src/gallium/drivers/llvmpipe/lp_bld_interp.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * @file
  31  * Position and shader input interpolation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36 #include "pipe/p_shader_tokens.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_math.h"
  40 #include "tgsi/tgsi_scan.h"
  41 #include "gallivm/lp_bld_debug.h"
  42 #include "gallivm/lp_bld_const.h"
  43 #include "gallivm/lp_bld_arit.h"
  44 #include "gallivm/lp_bld_swizzle.h"
  45 #include "lp_bld_interp.h"
  46
  47
  48 /*
  49  * The shader JIT function operates on blocks of quads.
  50  * Each block has 2x2 quads and each quad has 2x2 pixels.
  51  *
  52  * We iterate over the quads in order 0, 1, 2, 3:
  53  *
  54  * #################
  55  * #   |   #   |   #
  56  * #---0---#---1---#
  57  * #   |   #   |   #
  58  * #################
  59  * #   |   #   |   #
  60  * #---2---#---3---#
  61  * #   |   #   |   #
  62  * #################
  63  *
  64  * Within each quad, we have four pixels which are represented in SOA
  65  * order:
  66  *
  67  * #########
  68  * # 0 | 1 #
  69  * #---+---#
  70  * # 2 | 3 #
  71  * #########
  72  *
  73  * So the green channel (for example) of the four pixels is stored in
  74  * a single vector register: {g0, g1, g2, g3}.
  75  */
  76
  77
  78 /**
  79  * Do one perspective divide per quad.
  80  *
  81  * For perspective interpolation, the final attribute value is given
  82  *
  83  *  a' = a/w = a * oow
  84  *
  85  * where
  86  *
  87  *  a = a0 + dadx*x + dady*y
  88  *  w = w0 + dwdx*x + dwdy*y
  89  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
  90  *
  91  * Instead of computing the division per pixel, with this macro we compute the
  92  * division on the upper left pixel of each quad, and use a linear
  93  * approximation in the remaining pixels, given by:
  94  *
  95  *  da'dx = (dadx - dwdx*a)*oow
  96  *  da'dy = (dady - dwdy*a)*oow
  97  *
  98  * Ironically, this actually makes things slower -- probably because the
  99  * divide hardware unit is rarely used, whereas the multiply unit is typically
 100  * already saturated.
 101  */
 102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
 103
 104
 105 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
 106 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 107
 108
 109 static void
 110 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
 111 {
 112    if(attrib == 0)
 113       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
 114    else
 115       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
 116 }
 117
 118
 119 /**
 120  * Initialize the bld->a0, dadx, dady fields.  This involves fetching
 121  * those values from the arrays which are passed into the JIT function.
 122  */
 123 static void
 124 coeffs_init(struct lp_build_interp_soa_context *bld,
 125             LLVMValueRef a0_ptr,
 126             LLVMValueRef dadx_ptr,
 127             LLVMValueRef dady_ptr)
 128 {
 129    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 130    LLVMBuilderRef builder = coeff_bld->builder;
 131    LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
 132    LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
 133    LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 134    LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
 135    LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
 136    LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
 137    unsigned attrib;
 138    unsigned chan;
 139
 140    /* TODO: Use more vector operations */
 141
 142    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 143       const unsigned mask = bld->mask[attrib];
 144       const unsigned interp = bld->interp[attrib];
 145       for (chan = 0; chan < NUM_CHANNELS; ++chan) {
 146          if (mask & (1 << chan)) {
 147             LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
 148             LLVMValueRef a0 = zero;
 149             LLVMValueRef dadx = zero;
 150             LLVMValueRef dady = zero;
 151             LLVMValueRef dadxy = zero;
 152             LLVMValueRef dadq;
 153             LLVMValueRef dadq2;
 154             LLVMValueRef a;
 155
 156             switch (interp) {
 157             case LP_INTERP_PERSPECTIVE:
 158                /* fall-through */
 159
 160             case LP_INTERP_LINEAR:
 161                if (attrib == 0 && chan == 0) {
 162                   dadxy = dadx = one;
 163                }
 164                else if (attrib == 0 && chan == 1) {
 165                   dadxy = dady = one;
 166                }
 167                else {
 168                   dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
 169                   dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
 170                   dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
 171                   attrib_name(dadx, attrib, chan, ".dadx");
 172                   attrib_name(dady, attrib, chan, ".dady");
 173                   attrib_name(dadxy, attrib, chan, ".dadxy");
 174                }
 175                /* fall-through */
 176
 177             case LP_INTERP_CONSTANT:
 178             case LP_INTERP_FACING:
 179                a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
 180                attrib_name(a0, attrib, chan, ".a0");
 181                break;
 182
 183             case LP_INTERP_POSITION:
 184                /* Nothing to do as the position coeffs are already setup in slot 0 */
 185                continue;
 186
 187             default:
 188                assert(0);
 189                break;
 190             }
 191
 192             /*
 193              * dadq = {0, dadx, dady, dadx + dady}
 194              */
 195
 196             dadq = coeff_bld->undef;
 197             dadq = LLVMBuildInsertElement(builder, dadq, zero,  i0, "");
 198             dadq = LLVMBuildInsertElement(builder, dadq, dadx,  i1, "");
 199             dadq = LLVMBuildInsertElement(builder, dadq, dady,  i2, "");
 200             dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
 201
 202             /*
 203              * dadq2 = 2 * dq
 204              */
 205
 206             dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
 207
 208             /*
 209              * a = a0 + (x * dadx + y * dady)
 210              */
 211
 212             if (attrib == 0 && chan == 0) {
 213                a = bld->x;
 214             }
 215             else if (attrib == 0 && chan == 1) {
 216                a = bld->y;
 217             }
 218             else {
 219                a = a0;
 220                if (interp != LP_INTERP_CONSTANT &&
 221                    interp != LP_INTERP_FACING) {
 222                   LLVMValueRef ax, ay, axy;
 223                   ax = LLVMBuildFMul(builder, bld->x, dadx, "");
 224                   ay = LLVMBuildFMul(builder, bld->y, dady, "");
 225                   axy = LLVMBuildFAdd(builder, ax, ay, "");
 226                   a = LLVMBuildFAdd(builder, a, axy, "");
 227                }
 228             }
 229
 230             /*
 231              * a = {a, a, a, a}
 232              */
 233
 234             a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
 235
 236             /*
 237              * Compute the attrib values on the upper-left corner of each quad.
 238              */
 239
 240             a = LLVMBuildFAdd(builder, a, dadq2, "");
 241
 242 #if PERSPECTIVE_DIVIDE_PER_QUAD
 243             /*
 244              * a *= 1 / w
 245              */
 246
 247             if (interp == LP_INTERP_PERSPECTIVE) {
 248                LLVMValueRef w = bld->a[0][3];
 249                assert(attrib != 0);
 250                assert(bld->mask[0] & TGSI_WRITEMASK_W);
 251                if (!bld->oow) {
 252                   bld->oow = lp_build_rcp(coeff_bld, w);
 253                   lp_build_name(bld->oow, "oow");
 254                }
 255                a = lp_build_mul(coeff_bld, a, bld->oow);
 256             }
 257 #endif
 258
 259             attrib_name(a, attrib, chan, ".a");
 260             attrib_name(dadq, attrib, chan, ".dadq");
 261
 262             bld->a   [attrib][chan] = a;
 263             bld->dadq[attrib][chan] = dadq;
 264          }
 265       }
 266    }
 267 }
 268
 269
 270 /**
 271  * Increment the shader input attribute values.
 272  * This is called when we move from one quad to the next.
 273  */
 274 static void
 275 attribs_update(struct lp_build_interp_soa_context *bld,
 276                int quad_index,
 277                int start,
 278                int end)
 279 {
 280    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 281    LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
 282    LLVMValueRef oow = NULL;
 283    unsigned attrib;
 284    unsigned chan;
 285
 286    assert(quad_index < 4);
 287
 288    for(attrib = start; attrib < end; ++attrib) {
 289       const unsigned mask = bld->mask[attrib];
 290       const unsigned interp = bld->interp[attrib];
 291       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
 292          if(mask & (1 << chan)) {
 293             LLVMValueRef a;
 294             if (interp == LP_INTERP_CONSTANT ||
 295                 interp == LP_INTERP_FACING) {
 296                a = bld->a[attrib][chan];
 297             }
 298             else if (interp == LP_INTERP_POSITION) {
 299                assert(attrib > 0);
 300                a = bld->attribs[0][chan];
 301             }
 302             else {
 303                LLVMValueRef dadq;
 304
 305                a = bld->a[attrib][chan];
 306
 307                /*
 308                 * Broadcast the attribute value for this quad into all elements
 309                 */
 310
 311                a = LLVMBuildShuffleVector(coeff_bld->builder,
 312                                           a, coeff_bld->undef, shuffle, "");
 313
 314                /*
 315                 * Get the derivatives.
 316                 */
 317
 318                dadq = bld->dadq[attrib][chan];
 319
 320 #if PERSPECTIVE_DIVIDE_PER_QUAD
 321                if (interp == LP_INTERP_PERSPECTIVE) {
 322                   LLVMValueRef dwdq = bld->dadq[0][3];
 323
 324                   if (oow == NULL) {
 325                      assert(bld->oow);
 326                      oow = LLVMBuildShuffleVector(coeff_bld->builder,
 327                                                   bld->oow, coeff_bld->undef,
 328                                                   shuffle, "");
 329                   }
 330
 331                   dadq = lp_build_sub(coeff_bld,
 332                                       dadq,
 333                                       lp_build_mul(coeff_bld, a, dwdq));
 334                   dadq = lp_build_mul(coeff_bld, dadq, oow);
 335                }
 336 #endif
 337
 338                /*
 339                 * Add the derivatives
 340                 */
 341
 342                a = lp_build_add(coeff_bld, a, dadq);
 343
 344 #if !PERSPECTIVE_DIVIDE_PER_QUAD
 345                if (interp == LP_INTERP_PERSPECTIVE) {
 346                   if (oow == NULL) {
 347                      LLVMValueRef w = bld->attribs[0][3];
 348                      assert(attrib != 0);
 349                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
 350                      oow = lp_build_rcp(coeff_bld, w);
 351                   }
 352                   a = lp_build_mul(coeff_bld, a, oow);
 353                }
 354 #endif
 355
 356                if (attrib == 0 && chan == 2) {
 357                   /* FIXME: Depth values can exceed 1.0, due to the fact that
 358                    * setup interpolation coefficients refer to (0,0) which causes
 359                    * precision loss. So we must clamp to 1.0 here to avoid artifacts
 360                    */
 361                   a = lp_build_min(coeff_bld, a, coeff_bld->one);
 362                }
 363
 364                attrib_name(a, attrib, chan, "");
 365             }
 366             bld->attribs[attrib][chan] = a;
 367          }
 368       }
 369    }
 370 }
 371
 372
 373 /**
 374  * Generate the position vectors.
 375  *
 376  * Parameter x0, y0 are the integer values with upper left coordinates.
 377  */
 378 static void
 379 pos_init(struct lp_build_interp_soa_context *bld,
 380          LLVMValueRef x0,
 381          LLVMValueRef y0)
 382 {
 383    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 384
 385    bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
 386    bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
 387 }
 388
 389
 390 /**
 391  * Initialize fragment shader input attribute info.
 392  */
 393 void
 394 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 395                          unsigned num_inputs,
 396                          const struct lp_shader_input *inputs,
 397                          LLVMBuilderRef builder,
 398                          struct lp_type type,
 399                          LLVMValueRef a0_ptr,
 400                          LLVMValueRef dadx_ptr,
 401                          LLVMValueRef dady_ptr,
 402                          LLVMValueRef x0,
 403                          LLVMValueRef y0)
 404 {
 405    struct lp_type coeff_type;
 406    unsigned attrib;
 407    unsigned chan;
 408
 409    memset(bld, 0, sizeof *bld);
 410
 411    memset(&coeff_type, 0, sizeof coeff_type);
 412    coeff_type.floating = TRUE;
 413    coeff_type.sign = TRUE;
 414    coeff_type.width = 32;
 415    coeff_type.length = QUAD_SIZE;
 416
 417    /* XXX: we don't support interpolating into any other types */
 418    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
 419
 420    lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
 421
 422    /* For convenience */
 423    bld->pos = bld->attribs[0];
 424    bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
 425
 426    /* Position */
 427    bld->num_attribs = 1;
 428    bld->mask[0] = TGSI_WRITEMASK_XYZW;
 429    bld->interp[0] = LP_INTERP_LINEAR;
 430
 431    /* Inputs */
 432    for (attrib = 0; attrib < num_inputs; ++attrib) {
 433       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
 434       bld->interp[1 + attrib] = inputs[attrib].interp;
 435    }
 436    bld->num_attribs = 1 + num_inputs;
 437
 438    /* Ensure all masked out input channels have a valid value */
 439    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 440       for (chan = 0; chan < NUM_CHANNELS; ++chan) {
 441          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
 442       }
 443    }
 444
 445    pos_init(bld, x0, y0);
 446
 447    coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
 448 }
 449
 450
 451 /**
 452  * Advance the position and inputs to the given quad within the block.
 453  */
 454 void
 455 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
 456                                   int quad_index)
 457 {
 458    assert(quad_index < 4);
 459
 460    attribs_update(bld, quad_index, 1, bld->num_attribs);
 461 }
 462
 463 void
 464 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
 465                                   int quad_index)
 466 {
 467    assert(quad_index < 4);
 468
 469    attribs_update(bld, quad_index, 0, 1);
 470 }
 471