src/gallium/drivers/llvmpipe/lp_bld_interp.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * @file
  31  * Position and shader input interpolation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36 #include "pipe/p_shader_tokens.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_math.h"
  40 #include "tgsi/tgsi_scan.h"
  41 #include "gallivm/lp_bld_debug.h"
  42 #include "gallivm/lp_bld_const.h"
  43 #include "gallivm/lp_bld_arit.h"
  44 #include "gallivm/lp_bld_swizzle.h"
  45 #include "gallivm/lp_bld_flow.h"
  46 #include "lp_bld_interp.h"
  47
  48
  49 /*
  50  * The shader JIT function operates on blocks of quads.
  51  * Each block has 2x2 quads and each quad has 2x2 pixels.
  52  *
  53  * We iterate over the quads in order 0, 1, 2, 3:
  54  *
  55  * #################
  56  * #   |   #   |   #
  57  * #---0---#---1---#
  58  * #   |   #   |   #
  59  * #################
  60  * #   |   #   |   #
  61  * #---2---#---3---#
  62  * #   |   #   |   #
  63  * #################
  64  *
  65  * If we iterate over multiple quads at once, quads 01 and 23 are processed
  66  * together.
  67  *
  68  * Within each quad, we have four pixels which are represented in SOA
  69  * order:
  70  *
  71  * #########
  72  * # 0 | 1 #
  73  * #---+---#
  74  * # 2 | 3 #
  75  * #########
  76  *
  77  * So the green channel (for example) of the four pixels is stored in
  78  * a single vector register: {g0, g1, g2, g3}.
  79  * The order stays the same even with multiple quads:
  80  * 0 1 4 5
  81  * 2 3 6 7
  82  * is stored as g0..g7
  83  */
  84
  85
  86 /**
  87  * Do one perspective divide per quad.
  88  *
  89  * For perspective interpolation, the final attribute value is given
  90  *
  91  *  a' = a/w = a * oow
  92  *
  93  * where
  94  *
  95  *  a = a0 + dadx*x + dady*y
  96  *  w = w0 + dwdx*x + dwdy*y
  97  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
  98  *
  99  * Instead of computing the division per pixel, with this macro we compute the
 100  * division on the upper left pixel of each quad, and use a linear
 101  * approximation in the remaining pixels, given by:
 102  *
 103  *  da'dx = (dadx - dwdx*a)*oow
 104  *  da'dy = (dady - dwdy*a)*oow
 105  *
 106  * Ironically, this actually makes things slower -- probably because the
 107  * divide hardware unit is rarely used, whereas the multiply unit is typically
 108  * already saturated.
 109  */
 110 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
 111
 112
 113 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
 114 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
 115
 116
 117 static void
 118 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
 119 {
 120    if(attrib == 0)
 121       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
 122    else
 123       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
 124 }
 125
 126 static void
 127 calc_offsets(struct lp_build_context *coeff_bld,
 128              unsigned quad_start_index,
 129              LLVMValueRef *pixoffx,
 130              LLVMValueRef *pixoffy)
 131 {
 132    unsigned i;
 133    unsigned num_pix = coeff_bld->type.length;
 134    struct gallivm_state *gallivm = coeff_bld->gallivm;
 135    LLVMBuilderRef builder = coeff_bld->gallivm->builder;
 136    LLVMValueRef nr, pixxf, pixyf;
 137
 138    *pixoffx = coeff_bld->undef;
 139    *pixoffy = coeff_bld->undef;
 140
 141    for (i = 0; i < num_pix; i++) {
 142       nr = lp_build_const_int32(gallivm, i);
 143       pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
 144                                    (quad_start_index & 1) * 2);
 145       pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
 146                                    (quad_start_index & 2));
 147       *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
 148       *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
 149    }
 150 }
 151
 152
 153 /* Much easier, and significantly less instructions in the per-stamp
 154  * part (less than half) but overall more instructions so a loss if
 155  * most quads are active. Might be a win though with larger vectors.
 156  * No ability to do per-quad divide (doable but not implemented)
 157  * Could be made to work with passed in pixel offsets (i.e. active quad merging).
 158  */
 159 static void
 160 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
 161                    LLVMValueRef a0_ptr,
 162                    LLVMValueRef dadx_ptr,
 163                    LLVMValueRef dady_ptr)
 164 {
 165    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 166    struct lp_build_context *setup_bld = &bld->setup_bld;
 167    struct gallivm_state *gallivm = coeff_bld->gallivm;
 168    LLVMBuilderRef builder = gallivm->builder;
 169    unsigned attrib;
 170
 171    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 172       /*
 173        * always fetch all 4 values for performance/simplicity
 174        * Note: we do that here because it seems to generate better
 175        * code. It generates a lot of moves initially but less
 176        * moves later. As far as I can tell this looks like a
 177        * llvm issue, instead of simply reloading the values from
 178        * the passed in pointers it if it runs out of registers
 179        * it spills/reloads them. Maybe some optimization passes
 180        * would help.
 181        * Might want to investigate this again later.
 182        */
 183       const unsigned interp = bld->interp[attrib];
 184       LLVMValueRef index = lp_build_const_int32(gallivm,
 185                                 attrib * TGSI_NUM_CHANNELS);
 186       LLVMValueRef ptr;
 187       LLVMValueRef dadxaos = setup_bld->zero;
 188       LLVMValueRef dadyaos = setup_bld->zero;
 189       LLVMValueRef a0aos = setup_bld->zero;
 190
 191       switch (interp) {
 192       case LP_INTERP_PERSPECTIVE:
 193          /* fall-through */
 194
 195       case LP_INTERP_LINEAR:
 196          ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
 197          ptr = LLVMBuildBitCast(builder, ptr,
 198                LLVMPointerType(setup_bld->vec_type, 0), "");
 199          dadxaos = LLVMBuildLoad(builder, ptr, "");
 200
 201          ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
 202          ptr = LLVMBuildBitCast(builder, ptr,
 203                LLVMPointerType(setup_bld->vec_type, 0), "");
 204          dadyaos = LLVMBuildLoad(builder, ptr, "");
 205
 206          attrib_name(dadxaos, attrib, 0, ".dadxaos");
 207          attrib_name(dadyaos, attrib, 0, ".dadyaos");
 208          /* fall-through */
 209
 210       case LP_INTERP_CONSTANT:
 211       case LP_INTERP_FACING:
 212          ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
 213          ptr = LLVMBuildBitCast(builder, ptr,
 214                LLVMPointerType(setup_bld->vec_type, 0), "");
 215          a0aos = LLVMBuildLoad(builder, ptr, "");
 216          attrib_name(a0aos, attrib, 0, ".a0aos");
 217          break;
 218
 219       case LP_INTERP_POSITION:
 220          /* Nothing to do as the position coeffs are already setup in slot 0 */
 221          continue;
 222
 223       default:
 224          assert(0);
 225          break;
 226       }
 227       bld->a0aos[attrib] = a0aos;
 228       bld->dadxaos[attrib] = dadxaos;
 229       bld->dadyaos[attrib] = dadyaos;
 230    }
 231 }
 232
 233 /**
 234  * Interpolate the shader input attribute values.
 235  * This is called for each (group of) quad(s).
 236  */
 237 static void
 238 attribs_update_simple(struct lp_build_interp_soa_context *bld,
 239                       struct gallivm_state *gallivm,
 240                       int quad_start_index,
 241                       LLVMValueRef loop_iter,
 242                       int start,
 243                       int end)
 244 {
 245    LLVMBuilderRef builder = gallivm->builder;
 246    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 247    struct lp_build_context *setup_bld = &bld->setup_bld;
 248    LLVMValueRef oow = NULL;
 249    unsigned attrib;
 250    LLVMValueRef pixoffx;
 251    LLVMValueRef pixoffy;
 252
 253    /* could do this with code-generated passed in pixel offsets too */
 254    if (bld->dynamic_offsets) {
 255       LLVMValueRef ptr;
 256
 257       assert(loop_iter);
 258       ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
 259       pixoffx = LLVMBuildLoad(builder, ptr, "");
 260       ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
 261       pixoffy = LLVMBuildLoad(builder, ptr, "");
 262    }
 263    else {
 264       calc_offsets(coeff_bld, quad_start_index, &pixoffx, &pixoffy);
 265    }
 266
 267    pixoffx = LLVMBuildFAdd(builder, pixoffx,
 268                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
 269    pixoffy = LLVMBuildFAdd(builder, pixoffy,
 270                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
 271
 272    for (attrib = start; attrib < end; attrib++) {
 273       const unsigned mask = bld->mask[attrib];
 274       const unsigned interp = bld->interp[attrib];
 275       unsigned chan;
 276
 277       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 278          if (mask & (1 << chan)) {
 279             LLVMValueRef index;
 280             LLVMValueRef dadx = coeff_bld->zero;
 281             LLVMValueRef dady = coeff_bld->zero;
 282             LLVMValueRef a = coeff_bld->zero;
 283
 284             index = lp_build_const_int32(gallivm, chan);
 285             switch (interp) {
 286             case LP_INTERP_PERSPECTIVE:
 287                /* fall-through */
 288
 289             case LP_INTERP_LINEAR:
 290                if (attrib == 0 && chan == 0) {
 291                   dadx = coeff_bld->one;
 292                }
 293                else if (attrib == 0 && chan == 1) {
 294                   dady = coeff_bld->one;
 295                }
 296                else {
 297                   dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
 298                                                     coeff_bld->type, bld->dadxaos[attrib],
 299                                                     index);
 300                   dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
 301                                                     coeff_bld->type, bld->dadyaos[attrib],
 302                                                     index);
 303                   a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 304                                                  coeff_bld->type, bld->a0aos[attrib],
 305                                                  index);
 306                }
 307                /*
 308                 * a = a0 + (x * dadx + y * dady)
 309                 */
 310                dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
 311                dady = LLVMBuildFMul(builder, dady, pixoffy, "");
 312                a = LLVMBuildFAdd(builder, a, dadx, "");
 313                a = LLVMBuildFAdd(builder, a, dady, "");
 314
 315                if (interp == LP_INTERP_PERSPECTIVE) {
 316                   if (oow == NULL) {
 317                      LLVMValueRef w = bld->attribs[0][3];
 318                      assert(attrib != 0);
 319                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
 320                      oow = lp_build_rcp(coeff_bld, w);
 321                   }
 322                   a = lp_build_mul(coeff_bld, a, oow);
 323                }
 324                break;
 325
 326             case LP_INTERP_CONSTANT:
 327             case LP_INTERP_FACING:
 328                a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 329                                               coeff_bld->type, bld->a0aos[attrib],
 330                                               index);
 331                break;
 332
 333             case LP_INTERP_POSITION:
 334                assert(attrib > 0);
 335                a = bld->attribs[0][chan];
 336                break;
 337
 338             default:
 339                assert(0);
 340                break;
 341             }
 342
 343             if ((attrib == 0) && (chan == 2)){
 344                /* FIXME: Depth values can exceed 1.0, due to the fact that
 345                 * setup interpolation coefficients refer to (0,0) which causes
 346                 * precision loss. So we must clamp to 1.0 here to avoid artifacts
 347                 */
 348                a = lp_build_min(coeff_bld, a, coeff_bld->one);
 349             }
 350             bld->attribs[attrib][chan] = a;
 351          }
 352       }
 353    }
 354 }
 355
 356 /**
 357  * Initialize the bld->a, dadq fields.  This involves fetching
 358  * those values from the arrays which are passed into the JIT function.
 359  */
 360 static void
 361 coeffs_init(struct lp_build_interp_soa_context *bld,
 362             LLVMValueRef a0_ptr,
 363             LLVMValueRef dadx_ptr,
 364             LLVMValueRef dady_ptr)
 365 {
 366    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 367    struct lp_build_context *setup_bld = &bld->setup_bld;
 368    struct gallivm_state *gallivm = coeff_bld->gallivm;
 369    LLVMBuilderRef builder = gallivm->builder;
 370    LLVMValueRef pixoffx, pixoffy;
 371    unsigned attrib;
 372    unsigned chan;
 373    unsigned i;
 374
 375    pixoffx = coeff_bld->undef;
 376    pixoffy = coeff_bld->undef;
 377    for (i = 0; i < coeff_bld->type.length; i++) {
 378       LLVMValueRef nr = lp_build_const_int32(gallivm, i);
 379       LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
 380       LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
 381       pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
 382       pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
 383    }
 384
 385
 386    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 387       const unsigned mask = bld->mask[attrib];
 388       const unsigned interp = bld->interp[attrib];
 389       LLVMValueRef index = lp_build_const_int32(gallivm,
 390                                 attrib * TGSI_NUM_CHANNELS);
 391       LLVMValueRef ptr;
 392       LLVMValueRef dadxaos = setup_bld->zero;
 393       LLVMValueRef dadyaos = setup_bld->zero;
 394       LLVMValueRef a0aos = setup_bld->zero;
 395
 396       /* always fetch all 4 values for performance/simplicity */
 397       switch (interp) {
 398       case LP_INTERP_PERSPECTIVE:
 399          /* fall-through */
 400
 401       case LP_INTERP_LINEAR:
 402          ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
 403          ptr = LLVMBuildBitCast(builder, ptr,
 404                LLVMPointerType(setup_bld->vec_type, 0), "");
 405          dadxaos = LLVMBuildLoad(builder, ptr, "");
 406
 407          ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
 408          ptr = LLVMBuildBitCast(builder, ptr,
 409                LLVMPointerType(setup_bld->vec_type, 0), "");
 410          dadyaos = LLVMBuildLoad(builder, ptr, "");
 411
 412          attrib_name(dadxaos, attrib, 0, ".dadxaos");
 413          attrib_name(dadyaos, attrib, 0, ".dadyaos");
 414          /* fall-through */
 415
 416       case LP_INTERP_CONSTANT:
 417       case LP_INTERP_FACING:
 418          ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
 419          ptr = LLVMBuildBitCast(builder, ptr,
 420                LLVMPointerType(setup_bld->vec_type, 0), "");
 421          a0aos = LLVMBuildLoad(builder, ptr, "");
 422          attrib_name(a0aos, attrib, 0, ".a0aos");
 423          break;
 424
 425       case LP_INTERP_POSITION:
 426          /* Nothing to do as the position coeffs are already setup in slot 0 */
 427          continue;
 428
 429       default:
 430          assert(0);
 431          break;
 432       }
 433
 434       /*
 435        * a = a0 + (x * dadx + y * dady)
 436        * a0aos is the attrib value at top left corner of stamp
 437        */
 438       if (interp != LP_INTERP_CONSTANT &&
 439           interp != LP_INTERP_FACING) {
 440          LLVMValueRef axaos, ayaos;
 441          axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x),
 442                                dadxaos, "");
 443          ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y),
 444                                dadyaos, "");
 445          a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, "");
 446          a0aos = LLVMBuildFAdd(builder, a0aos, axaos, "");
 447       }
 448
 449       /*
 450        * dadq = {0, dadx, dady, dadx + dady}
 451        * for two quads (side by side) this is:
 452        * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
 453        */
 454       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
 455          /* this generates a CRAPLOAD of shuffles... */
 456          if (mask & (1 << chan)) {
 457             LLVMValueRef dadx, dady;
 458             LLVMValueRef dadq, dadq2;
 459             LLVMValueRef a;
 460             LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);
 461
 462             if (attrib == 0 && chan == 0) {
 463                a = lp_build_broadcast_scalar(coeff_bld, bld->x);
 464                dadx = coeff_bld->one;
 465                dady = coeff_bld->zero;
 466             }
 467             else if (attrib == 0 && chan == 1) {
 468                a = lp_build_broadcast_scalar(coeff_bld, bld->y);
 469                dady = coeff_bld->one;
 470                dadx = coeff_bld->zero;
 471             }
 472             else {
 473                dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
 474                                               coeff_bld->type, dadxaos, chan_index);
 475                dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
 476                                               coeff_bld->type, dadyaos, chan_index);
 477
 478                /*
 479                 * a = {a, a, a, a}
 480                 */
 481                a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 482                                               coeff_bld->type, a0aos, chan_index);
 483             }
 484
 485             dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
 486             dady = LLVMBuildFMul(builder, dady, pixoffy, "");
 487             dadq = LLVMBuildFAdd(builder, dadx, dady, "");
 488
 489             /*
 490              * Compute the attrib values on the upper-left corner of each
 491              * group of quads.
 492              * Note that if we process 2 quads at once this doesn't
 493              * really exactly to what we want.
 494              * We need to access elem 0 and 2 respectively later if we process
 495              * 2 quads at once.
 496              */
 497
 498             if (interp != LP_INTERP_CONSTANT &&
 499                 interp != LP_INTERP_FACING) {
 500                dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
 501                a = LLVMBuildFAdd(builder, a, dadq2, "");
 502             }
 503
 504 #if PERSPECTIVE_DIVIDE_PER_QUAD
 505             /*
 506              * a *= 1 / w
 507              */
 508
 509             /*
 510              * XXX since we're only going to access elements 0,2 out of 8
 511              * if we have 8-wide vectors we should do the division only 4-wide.
 512              * a is really a 2-elements in a 4-wide vector disguised as 8-wide
 513              * in this case.
 514              */
 515             if (interp == LP_INTERP_PERSPECTIVE) {
 516                LLVMValueRef w = bld->a[0][3];
 517                assert(attrib != 0);
 518                assert(bld->mask[0] & TGSI_WRITEMASK_W);
 519                if (!bld->oow) {
 520                   bld->oow = lp_build_rcp(coeff_bld, w);
 521                   lp_build_name(bld->oow, "oow");
 522                }
 523                a = lp_build_mul(coeff_bld, a, bld->oow);
 524             }
 525 #endif
 526
 527             attrib_name(a, attrib, chan, ".a");
 528             attrib_name(dadq, attrib, chan, ".dadq");
 529
 530             if (bld->dynamic_offsets) {
 531                bld->a[attrib][chan] = lp_build_alloca(gallivm,
 532                                                       LLVMTypeOf(a), "");
 533                LLVMBuildStore(builder, a, bld->a[attrib][chan]);
 534             }
 535             else {
 536                bld->a[attrib][chan] = a;
 537             }
 538             bld->dadq[attrib][chan] = dadq;
 539          }
 540       }
 541    }
 542 }
 543
 544
 545 /**
 546  * Increment the shader input attribute values.
 547  * This is called when we move from one quad to the next.
 548  */
 549 static void
 550 attribs_update(struct lp_build_interp_soa_context *bld,
 551                struct gallivm_state *gallivm,
 552                int quad_start_index,
 553                LLVMValueRef loop_iter,
 554                int start,
 555                int end)
 556 {
 557    LLVMBuilderRef builder = gallivm->builder;
 558    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 559    LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_start_index);
 560    LLVMValueRef oow = NULL;
 561    unsigned attrib;
 562    unsigned chan;
 563
 564    assert(quad_start_index < 4);
 565
 566    for(attrib = start; attrib < end; ++attrib) {
 567       const unsigned mask = bld->mask[attrib];
 568       const unsigned interp = bld->interp[attrib];
 569       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
 570          if(mask & (1 << chan)) {
 571             LLVMValueRef a;
 572             if (interp == LP_INTERP_CONSTANT ||
 573                 interp == LP_INTERP_FACING) {
 574                a = bld->a[attrib][chan];
 575                if (bld->dynamic_offsets) {
 576                   a = LLVMBuildLoad(builder, a, "");
 577                }
 578             }
 579             else if (interp == LP_INTERP_POSITION) {
 580                assert(attrib > 0);
 581                a = bld->attribs[0][chan];
 582             }
 583             else {
 584                LLVMValueRef dadq;
 585
 586                a = bld->a[attrib][chan];
 587
 588                /*
 589                 * Broadcast the attribute value for this quad into all elements
 590                 */
 591
 592                if (bld->dynamic_offsets) {
 593                   /* stored as vector load as float */
 594                   LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
 595                                                             gallivm->context), 0);
 596                   LLVMValueRef ptr;
 597                   a = LLVMBuildBitCast(builder, a, ptr_type, "");
 598                   ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
 599                   a = LLVMBuildLoad(builder, ptr, "");
 600                   a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
 601                }
 602                else {
 603                   a = LLVMBuildShuffleVector(builder,
 604                                              a, coeff_bld->undef, shuffle, "");
 605                }
 606
 607                /*
 608                 * Get the derivatives.
 609                 */
 610
 611                dadq = bld->dadq[attrib][chan];
 612
 613 #if PERSPECTIVE_DIVIDE_PER_QUAD
 614                if (interp == LP_INTERP_PERSPECTIVE) {
 615                   LLVMValueRef dwdq = bld->dadq[0][3];
 616
 617                   if (oow == NULL) {
 618                      assert(bld->oow);
 619                      oow = LLVMBuildShuffleVector(coeff_bld->builder,
 620                                                   bld->oow, coeff_bld->undef,
 621                                                   shuffle, "");
 622                   }
 623
 624                   dadq = lp_build_sub(coeff_bld,
 625                                       dadq,
 626                                       lp_build_mul(coeff_bld, a, dwdq));
 627                   dadq = lp_build_mul(coeff_bld, dadq, oow);
 628                }
 629 #endif
 630
 631                /*
 632                 * Add the derivatives
 633                 */
 634
 635                a = lp_build_add(coeff_bld, a, dadq);
 636
 637 #if !PERSPECTIVE_DIVIDE_PER_QUAD
 638                if (interp == LP_INTERP_PERSPECTIVE) {
 639                   if (oow == NULL) {
 640                      LLVMValueRef w = bld->attribs[0][3];
 641                      assert(attrib != 0);
 642                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
 643                      oow = lp_build_rcp(coeff_bld, w);
 644                   }
 645                   a = lp_build_mul(coeff_bld, a, oow);
 646                }
 647 #endif
 648
 649                if (attrib == 0 && chan == 2) {
 650                   /* FIXME: Depth values can exceed 1.0, due to the fact that
 651                    * setup interpolation coefficients refer to (0,0) which causes
 652                    * precision loss. So we must clamp to 1.0 here to avoid artifacts
 653                    */
 654                   a = lp_build_min(coeff_bld, a, coeff_bld->one);
 655                }
 656
 657                attrib_name(a, attrib, chan, "");
 658             }
 659             bld->attribs[attrib][chan] = a;
 660          }
 661       }
 662    }
 663 }
 664
 665
 666 /**
 667  * Generate the position vectors.
 668  *
 669  * Parameter x0, y0 are the integer values with upper left coordinates.
 670  */
 671 static void
 672 pos_init(struct lp_build_interp_soa_context *bld,
 673          LLVMValueRef x0,
 674          LLVMValueRef y0)
 675 {
 676    LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
 677    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 678
 679    bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
 680    bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
 681 }
 682
 683
 684 /**
 685  * Initialize fragment shader input attribute info.
 686  */
 687 void
 688 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 689                          struct gallivm_state *gallivm,
 690                          unsigned num_inputs,
 691                          const struct lp_shader_input *inputs,
 692                          LLVMBuilderRef builder,
 693                          struct lp_type type,
 694                          boolean dynamic_offsets,
 695                          LLVMValueRef a0_ptr,
 696                          LLVMValueRef dadx_ptr,
 697                          LLVMValueRef dady_ptr,
 698                          LLVMValueRef x0,
 699                          LLVMValueRef y0)
 700 {
 701    struct lp_type coeff_type;
 702    struct lp_type setup_type;
 703    unsigned attrib;
 704    unsigned chan;
 705
 706    memset(bld, 0, sizeof *bld);
 707
 708    memset(&coeff_type, 0, sizeof coeff_type);
 709    coeff_type.floating = TRUE;
 710    coeff_type.sign = TRUE;
 711    coeff_type.width = 32;
 712    coeff_type.length = type.length;
 713
 714    memset(&setup_type, 0, sizeof setup_type);
 715    setup_type.floating = TRUE;
 716    setup_type.sign = TRUE;
 717    setup_type.width = 32;
 718    setup_type.length = TGSI_NUM_CHANNELS;
 719
 720
 721    /* XXX: we don't support interpolating into any other types */
 722    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
 723
 724    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
 725    lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
 726
 727    /* For convenience */
 728    bld->pos = bld->attribs[0];
 729    bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
 730
 731    /* Position */
 732    bld->mask[0] = TGSI_WRITEMASK_XYZW;
 733    bld->interp[0] = LP_INTERP_LINEAR;
 734
 735    /* Inputs */
 736    for (attrib = 0; attrib < num_inputs; ++attrib) {
 737       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
 738       bld->interp[1 + attrib] = inputs[attrib].interp;
 739    }
 740    bld->num_attribs = 1 + num_inputs;
 741
 742    /* Ensure all masked out input channels have a valid value */
 743    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 744       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
 745          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
 746       }
 747    }
 748
 749    pos_init(bld, x0, y0);
 750
 751    if (coeff_type.length > 4) {
 752       bld->simple_interp = TRUE;
 753       if (dynamic_offsets) {
 754          /* XXX this should use a global static table */
 755          unsigned i;
 756          unsigned num_loops = 16 / type.length;
 757          LLVMValueRef pixoffx, pixoffy, index;
 758          LLVMValueRef ptr;
 759
 760          bld->dynamic_offsets = TRUE;
 761          bld->xoffset_store = lp_build_array_alloca(gallivm,
 762                                                     lp_build_vec_type(gallivm, type),
 763                                                     lp_build_const_int32(gallivm, num_loops),
 764                                                     "");
 765          bld->yoffset_store = lp_build_array_alloca(gallivm,
 766                                                     lp_build_vec_type(gallivm, type),
 767                                                     lp_build_const_int32(gallivm, num_loops),
 768                                                     "");
 769          for (i = 0; i < num_loops; i++) {
 770             index = lp_build_const_int32(gallivm, i);
 771             calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
 772             ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
 773             LLVMBuildStore(builder, pixoffx, ptr);
 774             ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
 775             LLVMBuildStore(builder, pixoffy, ptr);
 776          }
 777       }
 778       coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
 779    }
 780    else {
 781       bld->simple_interp = FALSE;
 782       if (dynamic_offsets) {
 783          bld->dynamic_offsets = TRUE;
 784       }
 785       coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
 786    }
 787
 788 }
 789
 790
 791 /**
 792  * Advance the position and inputs to the given quad within the block.
 793  */
 794 void
 795 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
 796                                   struct gallivm_state *gallivm,
 797                                   int quad_start_index)
 798 {
 799    assert(quad_start_index < 4);
 800
 801    if (bld->simple_interp) {
 802       attribs_update_simple(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
 803    }
 804    else {
 805       attribs_update(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
 806    }
 807 }
 808
 809 void
 810 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
 811                                struct gallivm_state *gallivm,
 812                                int quad_start_index)
 813 {
 814    assert(quad_start_index < 4);
 815
 816    if (bld->simple_interp) {
 817       attribs_update_simple(bld, gallivm, quad_start_index, NULL, 0, 1);
 818    }
 819    else {
 820       attribs_update(bld, gallivm, quad_start_index, NULL, 0, 1);
 821    }
 822 }
 823
 824 void
 825 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
 826                                       struct gallivm_state *gallivm,
 827                                       LLVMValueRef quad_start_index)
 828 {
 829    if (bld->simple_interp) {
 830       attribs_update_simple(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
 831    }
 832    else {
 833       attribs_update(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
 834    }
 835 }
 836
 837 void
 838 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
 839                                    struct gallivm_state *gallivm,
 840                                    LLVMValueRef quad_start_index)
 841 {
 842    if (bld->simple_interp) {
 843       attribs_update_simple(bld, gallivm, 0, quad_start_index, 0, 1);
 844    }
 845    else {
 846       attribs_update(bld, gallivm, 0, quad_start_index, 0, 1);
 847    }
 848 }
 849