src/gallium/drivers/llvmpipe/lp_setup_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Binning code for triangles
  30  */
  31
  32 #include "lp_setup_context.h"
  33 #include "lp_rast.h"
  34 #include "util/u_math.h"
  35 #include "util/u_memory.h"
  36
  37 #define NUM_CHANNELS 4
  38
  39 /**
  40  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  41  */
  42 static void constant_coef( struct lp_rast_triangle *tri,
  43                            unsigned slot,
  44                            const float value,
  45                            unsigned i )
  46 {
  47    tri->inputs.a0[slot][i] = value;
  48    tri->inputs.dadx[slot][i] = 0;
  49    tri->inputs.dady[slot][i] = 0;
  50 }
  51
  52 /**
  53  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  54  * for a triangle.
  55  */
  56 static void linear_coef( struct lp_rast_triangle *tri,
  57                          float oneoverarea,
  58                          unsigned slot,
  59                          const float (*v1)[4],
  60                          const float (*v2)[4],
  61                          const float (*v3)[4],
  62                          unsigned vert_attr,
  63                          unsigned i)
  64 {
  65    float a1 = v1[vert_attr][i];
  66    float a2 = v2[vert_attr][i];
  67    float a3 = v3[vert_attr][i];
  68
  69    float da12 = a1 - a2;
  70    float da31 = a3 - a1;
  71    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
  72    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
  73
  74    tri->inputs.dadx[slot][i] = dadx;
  75    tri->inputs.dady[slot][i] = dady;
  76
  77    /* calculate a0 as the value which would be sampled for the
  78     * fragment at (0,0), taking into account that we want to sample at
  79     * pixel centers, in other words (0.5, 0.5).
  80     *
  81     * this is neat but unfortunately not a good way to do things for
  82     * triangles with very large values of dadx or dady as it will
  83     * result in the subtraction and re-addition from a0 of a very
  84     * large number, which means we'll end up loosing a lot of the
  85     * fractional bits and precision from a0.  the way to fix this is
  86     * to define a0 as the sample at a pixel center somewhere near vmin
  87     * instead - i'll switch to this later.
  88     */
  89    tri->inputs.a0[slot][i] = (v1[vert_attr][i] -
  90                               (dadx * (v1[0][0] - 0.5f) +
  91                                dady * (v1[0][1] - 0.5f)));
  92 }
  93
  94
  95 /**
  96  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  97  * for a triangle.
  98  * We basically multiply the vertex value by 1/w before computing
  99  * the plane coefficients (a0, dadx, dady).
 100  * Later, when we compute the value at a particular fragment position we'll
 101  * divide the interpolated value by the interpolated W at that fragment.
 102  */
 103 static void perspective_coef( struct lp_rast_triangle *tri,
 104                               float oneoverarea,
 105                               unsigned slot,
 106                               const float (*v1)[4],
 107                               const float (*v2)[4],
 108                               const float (*v3)[4],
 109                               unsigned vert_attr,
 110                               unsigned i)
 111 {
 112    /* premultiply by 1/w  (v[0][3] is always 1/w):
 113     */
 114    float a1 = v1[vert_attr][i] * v1[0][3];
 115    float a2 = v2[vert_attr][i] * v2[0][3];
 116    float a3 = v3[vert_attr][i] * v3[0][3];
 117    float da12 = a1 - a2;
 118    float da31 = a3 - a1;
 119    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
 120    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
 121
 122    tri->inputs.dadx[slot][i] = dadx;
 123    tri->inputs.dady[slot][i] = dady;
 124    tri->inputs.a0[slot][i] = (a1 -
 125                               (dadx * (v1[0][0] - 0.5f) +
 126                                dady * (v1[0][1] - 0.5f)));
 127 }
 128
 129
 130 /**
 131  * Special coefficient setup for gl_FragCoord.
 132  * X and Y are trivial, though Y has to be inverted for OpenGL.
 133  * Z and W are copied from position_coef which should have already been computed.
 134  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 135  */
 136 static void
 137 setup_fragcoord_coef(struct lp_rast_triangle *tri,
 138                      float oneoverarea,
 139                      unsigned slot,
 140                      const float (*v1)[4],
 141                      const float (*v2)[4],
 142                      const float (*v3)[4])
 143 {
 144    /*X*/
 145    tri->inputs.a0[slot][0] = 0.0;
 146    tri->inputs.dadx[slot][0] = 1.0;
 147    tri->inputs.dady[slot][0] = 0.0;
 148    /*Y*/
 149    tri->inputs.a0[slot][1] = 0.0;
 150    tri->inputs.dadx[slot][1] = 0.0;
 151    tri->inputs.dady[slot][1] = 1.0;
 152    /*Z*/
 153    linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2);
 154    /*W*/
 155    linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3);
 156 }
 157
 158
 159 static void setup_facing_coef( struct lp_rast_triangle *tri,
 160                                unsigned slot,
 161                                boolean frontface )
 162 {
 163    constant_coef( tri, slot, 1.0f - frontface, 0 );
 164    constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
 165    constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
 166    constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
 167 }
 168
 169
 170 /**
 171  * Compute the tri->coef[] array dadx, dady, a0 values.
 172  */
 173 static void setup_tri_coefficients( struct setup_context *setup,
 174                                     struct lp_rast_triangle *tri,
 175                                     float oneoverarea,
 176                                     const float (*v1)[4],
 177                                     const float (*v2)[4],
 178                                     const float (*v3)[4],
 179                                     boolean frontface)
 180 {
 181    unsigned slot;
 182
 183    /* Allocate space for the a0, dadx and dady arrays
 184     */
 185    {
 186       unsigned bytes;
 187       bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float);
 188       tri->inputs.a0   = lp_bin_alloc_aligned( &setup->data, bytes, 16 );
 189       tri->inputs.dadx = lp_bin_alloc_aligned( &setup->data, bytes, 16 );
 190       tri->inputs.dady = lp_bin_alloc_aligned( &setup->data, bytes, 16 );
 191    }
 192
 193    /* The internal position input is in slot zero:
 194     */
 195    setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3);
 196
 197    /* setup interpolation for all the remaining attributes:
 198     */
 199    for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
 200       unsigned vert_attr = setup->fs.input[slot].src_index;
 201       unsigned i;
 202
 203       switch (setup->fs.input[slot].interp) {
 204       case LP_INTERP_CONSTANT:
 205          for (i = 0; i < NUM_CHANNELS; i++)
 206             constant_coef(tri, slot+1, v3[vert_attr][i], i);
 207          break;
 208
 209       case LP_INTERP_LINEAR:
 210          for (i = 0; i < NUM_CHANNELS; i++)
 211             linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
 212          break;
 213
 214       case LP_INTERP_PERSPECTIVE:
 215          for (i = 0; i < NUM_CHANNELS; i++)
 216             perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
 217          break;
 218
 219       case LP_INTERP_POSITION:
 220          /* XXX: fix me - duplicates the values in slot zero.
 221           */
 222          setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3);
 223          break;
 224
 225       case LP_INTERP_FACING:
 226          setup_facing_coef(tri, slot+1, frontface);
 227          break;
 228
 229       default:
 230          assert(0);
 231       }
 232    }
 233 }
 234
 235
 236
 237 static inline int subpixel_snap( float a )
 238 {
 239    return util_iround(FIXED_ONE * a);
 240 }
 241
 242
 243 #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
 244 #define MAX3(a,b,c) MAX2(MAX2(a,b),c)
 245
 246 /**
 247  * Do basic setup for triangle rasterization and determine which
 248  * framebuffer tiles are touched.  Put the triangle in the bins for the
 249  * tiles which we overlap.
 250  */
 251 static void
 252 do_triangle_ccw(struct setup_context *setup,
 253                 const float (*v1)[4],
 254                 const float (*v2)[4],
 255                 const float (*v3)[4],
 256                 boolean frontfacing )
 257 {
 258    /* x/y positions in fixed point */
 259    const int x1 = subpixel_snap(v1[0][0]);
 260    const int x2 = subpixel_snap(v2[0][0]);
 261    const int x3 = subpixel_snap(v3[0][0]);
 262    const int y1 = subpixel_snap(v1[0][1]);
 263    const int y2 = subpixel_snap(v2[0][1]);
 264    const int y3 = subpixel_snap(v3[0][1]);
 265
 266    struct lp_rast_triangle *tri = lp_bin_alloc( &setup->data, sizeof *tri );
 267    float area, oneoverarea;
 268    int minx, maxx, miny, maxy;
 269
 270    tri->dx12 = x1 - x2;
 271    tri->dx23 = x2 - x3;
 272    tri->dx31 = x3 - x1;
 273
 274    tri->dy12 = y1 - y2;
 275    tri->dy23 = y2 - y3;
 276    tri->dy31 = y3 - y1;
 277
 278    area = (tri->dx12 * tri->dy31 -
 279            tri->dx31 * tri->dy12);
 280
 281    /* Cull non-ccw and zero-sized triangles.
 282     *
 283     * XXX: subject to overflow??
 284     */
 285    if (area <= 0) {
 286       lp_bin_putback_data( &setup->data, sizeof *tri );
 287       return;
 288    }
 289
 290    /* Bounding rectangle (in pixels) */
 291    tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER;
 292    tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER;
 293    tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER;
 294    tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER;
 295
 296    if (tri->miny == tri->maxy ||
 297        tri->minx == tri->maxx) {
 298       lp_bin_putback_data( &setup->data, sizeof *tri );
 299       return;
 300    }
 301
 302    /*
 303     */
 304    oneoverarea = ((float)FIXED_ONE) / (float)area;
 305
 306    /* Setup parameter interpolants:
 307     */
 308    setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
 309
 310    /* half-edge constants, will be interated over the whole
 311     * rendertarget.
 312     */
 313    tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
 314    tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
 315    tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
 316
 317    /* correct for top-left fill convention:
 318     */
 319    if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
 320    if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
 321    if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
 322
 323    tri->dy12 *= FIXED_ONE;
 324    tri->dy23 *= FIXED_ONE;
 325    tri->dy31 *= FIXED_ONE;
 326
 327    tri->dx12 *= FIXED_ONE;
 328    tri->dx23 *= FIXED_ONE;
 329    tri->dx31 *= FIXED_ONE;
 330
 331    /* find trivial reject offsets for each edge for a single-pixel
 332     * sized block.  These will be scaled up at each recursive level to
 333     * match the active blocksize.  Scaling in this way works best if
 334     * the blocks are square.
 335     */
 336    tri->eo1 = 0;
 337    if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
 338    if (tri->dx12 > 0) tri->eo1 += tri->dx12;
 339
 340    tri->eo2 = 0;
 341    if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
 342    if (tri->dx23 > 0) tri->eo2 += tri->dx23;
 343
 344    tri->eo3 = 0;
 345    if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
 346    if (tri->dx31 > 0) tri->eo3 += tri->dx31;
 347
 348    /* Calculate trivial accept offsets from the above.
 349     */
 350    tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
 351    tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
 352    tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
 353
 354    {
 355       int xstep1 = -tri->dy12;
 356       int xstep2 = -tri->dy23;
 357       int xstep3 = -tri->dy31;
 358
 359       int ystep1 = tri->dx12;
 360       int ystep2 = tri->dx23;
 361       int ystep3 = tri->dx31;
 362
 363       int ix, iy;
 364       int i = 0;
 365
 366       int c1 = 0;
 367       int c2 = 0;
 368       int c3 = 0;
 369
 370       for (iy = 0; iy < 4; iy++) {
 371          int cx1 = c1;
 372          int cx2 = c2;
 373          int cx3 = c3;
 374
 375          for (ix = 0; ix < 4; ix++, i++) {
 376             tri->step[0][i] = cx1;
 377             tri->step[1][i] = cx2;
 378             tri->step[2][i] = cx3;
 379             cx1 += xstep1;
 380             cx2 += xstep2;
 381             cx3 += xstep3;
 382          }
 383
 384          c1 += ystep1;
 385          c2 += ystep2;
 386          c3 += ystep3;
 387       }
 388    }
 389
 390    /*
 391     * All fields of 'tri' are now set.  The remaining code here is
 392     * concerned with binning.
 393     */
 394
 395    /* Convert to tile coordinates:
 396     */
 397    minx = tri->minx / TILE_SIZE;
 398    miny = tri->miny / TILE_SIZE;
 399    maxx = tri->maxx / TILE_SIZE;
 400    maxy = tri->maxy / TILE_SIZE;
 401
 402    /* Determine which tile(s) intersect the triangle's bounding box
 403     */
 404    if (miny == maxy && minx == maxx)
 405    {
 406       /* Triangle is contained in a single tile:
 407        */
 408       lp_bin_command( &setup->tile[minx][miny], lp_rast_triangle,
 409                    lp_rast_arg_triangle(tri) );
 410    }
 411    else
 412    {
 413       int c1 = (tri->c1 +
 414                 tri->dx12 * miny * TILE_SIZE -
 415                 tri->dy12 * minx * TILE_SIZE);
 416       int c2 = (tri->c2 +
 417                 tri->dx23 * miny * TILE_SIZE -
 418                 tri->dy23 * minx * TILE_SIZE);
 419       int c3 = (tri->c3 +
 420                 tri->dx31 * miny * TILE_SIZE -
 421                 tri->dy31 * minx * TILE_SIZE);
 422
 423       int ei1 = tri->ei1 << TILE_ORDER;
 424       int ei2 = tri->ei2 << TILE_ORDER;
 425       int ei3 = tri->ei3 << TILE_ORDER;
 426
 427       int eo1 = tri->eo1 << TILE_ORDER;
 428       int eo2 = tri->eo2 << TILE_ORDER;
 429       int eo3 = tri->eo3 << TILE_ORDER;
 430
 431       int xstep1 = -(tri->dy12 << TILE_ORDER);
 432       int xstep2 = -(tri->dy23 << TILE_ORDER);
 433       int xstep3 = -(tri->dy31 << TILE_ORDER);
 434
 435       int ystep1 = tri->dx12 << TILE_ORDER;
 436       int ystep2 = tri->dx23 << TILE_ORDER;
 437       int ystep3 = tri->dx31 << TILE_ORDER;
 438       int x, y;
 439
 440
 441       /* Trivially accept or reject blocks, else jump to per-pixel
 442        * examination above.
 443        */
 444       for (y = miny; y <= maxy; y++)
 445       {
 446          int cx1 = c1;
 447          int cx2 = c2;
 448          int cx3 = c3;
 449          int in = 0;
 450
 451          for (x = minx; x <= maxx; x++)
 452          {
 453             if (cx1 + eo1 < 0 ||
 454                 cx2 + eo2 < 0 ||
 455                 cx3 + eo3 < 0)
 456             {
 457                /* do nothing */
 458                if (in)
 459                   break;
 460             }
 461             else if (cx1 + ei1 > 0 &&
 462                      cx2 + ei2 > 0 &&
 463                      cx3 + ei3 > 0)
 464             {
 465                in = 1;
 466                /* triangle covers the whole tile- shade whole tile */
 467                lp_bin_command( &setup->tile[x][y],
 468                             lp_rast_shade_tile,
 469                             lp_rast_arg_inputs(&tri->inputs) );
 470             }
 471             else
 472             {
 473                in = 1;
 474                /* shade partial tile */
 475                lp_bin_command( &setup->tile[x][y],
 476                             lp_rast_triangle,
 477                             lp_rast_arg_triangle(tri) );
 478             }
 479
 480             /* Iterate cx values across the region:
 481              */
 482             cx1 += xstep1;
 483             cx2 += xstep2;
 484             cx3 += xstep3;
 485          }
 486
 487          /* Iterate c values down the region:
 488           */
 489          c1 += ystep1;
 490          c2 += ystep2;
 491          c3 += ystep3;
 492       }
 493    }
 494 }
 495
 496 static void triangle_cw( struct setup_context *setup,
 497                          const float (*v0)[4],
 498                          const float (*v1)[4],
 499                          const float (*v2)[4] )
 500 {
 501    do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
 502 }
 503
 504 static void triangle_ccw( struct setup_context *setup,
 505                          const float (*v0)[4],
 506                          const float (*v1)[4],
 507                          const float (*v2)[4] )
 508 {
 509    do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
 510 }
 511
 512 static void triangle_both( struct setup_context *setup,
 513                            const float (*v0)[4],
 514                            const float (*v1)[4],
 515                            const float (*v2)[4] )
 516 {
 517    /* edge vectors e = v0 - v2, f = v1 - v2 */
 518    const float ex = v0[0][0] - v2[0][0];
 519    const float ey = v0[0][1] - v2[0][1];
 520    const float fx = v1[0][0] - v2[0][0];
 521    const float fy = v1[0][1] - v2[0][1];
 522
 523    /* det = cross(e,f).z */
 524    if (ex * fy - ey * fx < 0)
 525       triangle_ccw( setup, v0, v1, v2 );
 526    else
 527       triangle_cw( setup, v0, v1, v2 );
 528 }
 529
 530 static void triangle_nop( struct setup_context *setup,
 531                           const float (*v0)[4],
 532                           const float (*v1)[4],
 533                           const float (*v2)[4] )
 534 {
 535 }
 536
 537
 538 void
 539 lp_setup_choose_triangle( struct setup_context *setup )
 540 {
 541    switch (setup->cullmode) {
 542    case PIPE_WINDING_NONE:
 543       setup->triangle = triangle_both;
 544       break;
 545    case PIPE_WINDING_CCW:
 546       setup->triangle = triangle_cw;
 547       break;
 548    case PIPE_WINDING_CW:
 549       setup->triangle = triangle_ccw;
 550       break;
 551    default:
 552       setup->triangle = triangle_nop;
 553       break;
 554    }
 555 }
 556
 557