src/gallium/drivers/llvmpipe/lp_setup_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Binning code for triangles
  30  */
  31
  32 #include "lp_setup_context.h"
  33 #include "lp_rast.h"
  34 #include "util/u_math.h"
  35 #include "util/u_memory.h"
  36
  37 #define NUM_CHANNELS 4
  38
  39
  40 /**
  41  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  42  */
  43 static void constant_coef( struct lp_rast_triangle *tri,
  44                            unsigned slot,
  45                            const float value,
  46                            unsigned i )
  47 {
  48    tri->inputs.a0[slot][i] = value;
  49    tri->inputs.dadx[slot][i] = 0.0f;
  50    tri->inputs.dady[slot][i] = 0.0f;
  51 }
  52
  53
  54 /**
  55  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  56  * for a triangle.
  57  */
  58 static void linear_coef( struct lp_rast_triangle *tri,
  59                          float oneoverarea,
  60                          unsigned slot,
  61                          const float (*v1)[4],
  62                          const float (*v2)[4],
  63                          const float (*v3)[4],
  64                          unsigned vert_attr,
  65                          unsigned i)
  66 {
  67    float a1 = v1[vert_attr][i];
  68    float a2 = v2[vert_attr][i];
  69    float a3 = v3[vert_attr][i];
  70
  71    float da12 = a1 - a2;
  72    float da31 = a3 - a1;
  73    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
  74    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
  75
  76    tri->inputs.dadx[slot][i] = dadx;
  77    tri->inputs.dady[slot][i] = dady;
  78
  79    /* calculate a0 as the value which would be sampled for the
  80     * fragment at (0,0), taking into account that we want to sample at
  81     * pixel centers, in other words (0.5, 0.5).
  82     *
  83     * this is neat but unfortunately not a good way to do things for
  84     * triangles with very large values of dadx or dady as it will
  85     * result in the subtraction and re-addition from a0 of a very
  86     * large number, which means we'll end up loosing a lot of the
  87     * fractional bits and precision from a0.  the way to fix this is
  88     * to define a0 as the sample at a pixel center somewhere near vmin
  89     * instead - i'll switch to this later.
  90     */
  91    tri->inputs.a0[slot][i] = (v1[vert_attr][i] -
  92                               (dadx * (v1[0][0] - 0.5f) +
  93                                dady * (v1[0][1] - 0.5f)));
  94 }
  95
  96
  97 /**
  98  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  99  * for a triangle.
 100  * We basically multiply the vertex value by 1/w before computing
 101  * the plane coefficients (a0, dadx, dady).
 102  * Later, when we compute the value at a particular fragment position we'll
 103  * divide the interpolated value by the interpolated W at that fragment.
 104  */
 105 static void perspective_coef( struct lp_rast_triangle *tri,
 106                               float oneoverarea,
 107                               unsigned slot,
 108                               const float (*v1)[4],
 109                               const float (*v2)[4],
 110                               const float (*v3)[4],
 111                               unsigned vert_attr,
 112                               unsigned i)
 113 {
 114    /* premultiply by 1/w  (v[0][3] is always 1/w):
 115     */
 116    float a1 = v1[vert_attr][i] * v1[0][3];
 117    float a2 = v2[vert_attr][i] * v2[0][3];
 118    float a3 = v3[vert_attr][i] * v3[0][3];
 119    float da12 = a1 - a2;
 120    float da31 = a3 - a1;
 121    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
 122    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
 123
 124    tri->inputs.dadx[slot][i] = dadx;
 125    tri->inputs.dady[slot][i] = dady;
 126    tri->inputs.a0[slot][i] = (a1 -
 127                               (dadx * (v1[0][0] - 0.5f) +
 128                                dady * (v1[0][1] - 0.5f)));
 129 }
 130
 131
 132 /**
 133  * Special coefficient setup for gl_FragCoord.
 134  * X and Y are trivial
 135  * Z and W are copied from position_coef which should have already been computed.
 136  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 137  */
 138 static void
 139 setup_fragcoord_coef(struct lp_rast_triangle *tri,
 140                      float oneoverarea,
 141                      unsigned slot,
 142                      const float (*v1)[4],
 143                      const float (*v2)[4],
 144                      const float (*v3)[4])
 145 {
 146    /*X*/
 147    tri->inputs.a0[slot][0] = 0.0;
 148    tri->inputs.dadx[slot][0] = 1.0;
 149    tri->inputs.dady[slot][0] = 0.0;
 150    /*Y*/
 151    tri->inputs.a0[slot][1] = 0.0;
 152    tri->inputs.dadx[slot][1] = 0.0;
 153    tri->inputs.dady[slot][1] = 1.0;
 154    /*Z*/
 155    linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2);
 156    /*W*/
 157    linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3);
 158 }
 159
 160
 161 static void setup_facing_coef( struct lp_rast_triangle *tri,
 162                                unsigned slot,
 163                                boolean frontface )
 164 {
 165    constant_coef( tri, slot, 1.0f - frontface, 0 );
 166    constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
 167    constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
 168    constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
 169 }
 170
 171
 172 /**
 173  * Compute the tri->coef[] array dadx, dady, a0 values.
 174  */
 175 static void setup_tri_coefficients( struct setup_context *setup,
 176                                     struct lp_rast_triangle *tri,
 177                                     float oneoverarea,
 178                                     const float (*v1)[4],
 179                                     const float (*v2)[4],
 180                                     const float (*v3)[4],
 181                                     boolean frontface)
 182 {
 183    struct lp_scene *scene = lp_setup_get_current_scene(setup);
 184    unsigned slot;
 185
 186    /* Allocate space for the a0, dadx and dady arrays
 187     */
 188    {
 189       unsigned bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float);
 190       tri->inputs.a0   = lp_scene_alloc_aligned( scene, bytes, 16 );
 191       tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 );
 192       tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 );
 193    }
 194
 195    /* The internal position input is in slot zero:
 196     */
 197    setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3);
 198
 199    /* setup interpolation for all the remaining attributes:
 200     */
 201    for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
 202       unsigned vert_attr = setup->fs.input[slot].src_index;
 203       unsigned i;
 204
 205       switch (setup->fs.input[slot].interp) {
 206       case LP_INTERP_CONSTANT:
 207          for (i = 0; i < NUM_CHANNELS; i++)
 208             constant_coef(tri, slot+1, v3[vert_attr][i], i);
 209          break;
 210
 211       case LP_INTERP_LINEAR:
 212          for (i = 0; i < NUM_CHANNELS; i++)
 213             linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
 214          break;
 215
 216       case LP_INTERP_PERSPECTIVE:
 217          for (i = 0; i < NUM_CHANNELS; i++)
 218             perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
 219          break;
 220
 221       case LP_INTERP_POSITION:
 222          /* XXX: fix me - duplicates the values in slot zero.
 223           */
 224          setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3);
 225          break;
 226
 227       case LP_INTERP_FACING:
 228          setup_facing_coef(tri, slot+1, frontface);
 229          break;
 230
 231       default:
 232          assert(0);
 233       }
 234    }
 235 }
 236
 237
 238
 239 static inline int subpixel_snap( float a )
 240 {
 241    return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
 242 }
 243
 244
 245 /**
 246  * Do basic setup for triangle rasterization and determine which
 247  * framebuffer tiles are touched.  Put the triangle in the scene's
 248  * bins for the tiles which we overlap.
 249  */
 250 static void
 251 do_triangle_ccw(struct setup_context *setup,
 252                 const float (*v1)[4],
 253                 const float (*v2)[4],
 254                 const float (*v3)[4],
 255                 boolean frontfacing )
 256 {
 257    /* x/y positions in fixed point */
 258    const int x1 = subpixel_snap(v1[0][0]);
 259    const int x2 = subpixel_snap(v2[0][0]);
 260    const int x3 = subpixel_snap(v3[0][0]);
 261    const int y1 = subpixel_snap(v1[0][1]);
 262    const int y2 = subpixel_snap(v2[0][1]);
 263    const int y3 = subpixel_snap(v3[0][1]);
 264
 265    struct lp_scene *scene = lp_setup_get_current_scene(setup);
 266    struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 );
 267    float area, oneoverarea;
 268    int minx, maxx, miny, maxy;
 269
 270    tri->dx12 = x1 - x2;
 271    tri->dx23 = x2 - x3;
 272    tri->dx31 = x3 - x1;
 273
 274    tri->dy12 = y1 - y2;
 275    tri->dy23 = y2 - y3;
 276    tri->dy31 = y3 - y1;
 277
 278    area = (tri->dx12 * tri->dy31 -
 279            tri->dx31 * tri->dy12);
 280
 281    /* Cull non-ccw and zero-sized triangles.
 282     *
 283     * XXX: subject to overflow??
 284     */
 285    if (area <= 0.0f) {
 286       lp_scene_putback_data( scene, sizeof *tri );
 287       return;
 288    }
 289
 290    /* Bounding rectangle (in pixels) */
 291    minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
 292    maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
 293    miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
 294    maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
 295
 296    if (setup->scissor_test) {
 297       minx = MAX2(minx, setup->scissor.current.minx);
 298       maxx = MIN2(maxx, setup->scissor.current.maxx);
 299       miny = MAX2(miny, setup->scissor.current.miny);
 300       maxy = MIN2(maxy, setup->scissor.current.maxy);
 301    }
 302
 303    if (miny == maxy ||
 304        minx == maxx) {
 305       lp_scene_putback_data( scene, sizeof *tri );
 306       return;
 307    }
 308
 309    /*
 310     */
 311    oneoverarea = ((float)FIXED_ONE) / (float)area;
 312
 313    /* Setup parameter interpolants:
 314     */
 315    setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
 316
 317    /* half-edge constants, will be interated over the whole render target.
 318     */
 319    tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
 320    tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
 321    tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
 322
 323    /* correct for top-left fill convention:
 324     */
 325    if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
 326    if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
 327    if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
 328
 329    tri->dy12 *= FIXED_ONE;
 330    tri->dy23 *= FIXED_ONE;
 331    tri->dy31 *= FIXED_ONE;
 332
 333    tri->dx12 *= FIXED_ONE;
 334    tri->dx23 *= FIXED_ONE;
 335    tri->dx31 *= FIXED_ONE;
 336
 337    /* find trivial reject offsets for each edge for a single-pixel
 338     * sized block.  These will be scaled up at each recursive level to
 339     * match the active blocksize.  Scaling in this way works best if
 340     * the blocks are square.
 341     */
 342    tri->eo1 = 0;
 343    if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
 344    if (tri->dx12 > 0) tri->eo1 += tri->dx12;
 345
 346    tri->eo2 = 0;
 347    if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
 348    if (tri->dx23 > 0) tri->eo2 += tri->dx23;
 349
 350    tri->eo3 = 0;
 351    if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
 352    if (tri->dx31 > 0) tri->eo3 += tri->dx31;
 353
 354    /* Calculate trivial accept offsets from the above.
 355     */
 356    tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
 357    tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
 358    tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
 359
 360    {
 361       const int xstep1 = -tri->dy12;
 362       const int xstep2 = -tri->dy23;
 363       const int xstep3 = -tri->dy31;
 364
 365       const int ystep1 = tri->dx12;
 366       const int ystep2 = tri->dx23;
 367       const int ystep3 = tri->dx31;
 368
 369       int qx, qy, ix, iy;
 370       int i = 0;
 371
 372       for (qy = 0; qy < 2; qy++) {
 373          for (qx = 0; qx < 2; qx++) {
 374             for (iy = 0; iy < 2; iy++) {
 375                for (ix = 0; ix < 2; ix++, i++) {
 376                   int x = qx * 2 + ix;
 377                   int y = qy * 2 + iy;
 378                   tri->inputs.step[0][i] = x * xstep1 + y * ystep1;
 379                   tri->inputs.step[1][i] = x * xstep2 + y * ystep2;
 380                   tri->inputs.step[2][i] = x * xstep3 + y * ystep3;
 381                }
 382             }
 383          }
 384       }
 385    }
 386
 387    /*
 388     * All fields of 'tri' are now set.  The remaining code here is
 389     * concerned with binning.
 390     */
 391
 392    /* Convert to tile coordinates:
 393     */
 394    minx = minx / TILE_SIZE;
 395    miny = miny / TILE_SIZE;
 396    maxx = maxx / TILE_SIZE;
 397    maxy = maxy / TILE_SIZE;
 398
 399    /* Clamp maxx, maxy to framebuffer size
 400     */
 401    maxx = MIN2(maxx, scene->tiles_x - 1);
 402    maxy = MIN2(maxy, scene->tiles_y - 1);
 403
 404    /* Determine which tile(s) intersect the triangle's bounding box
 405     */
 406    if (miny == maxy && minx == maxx)
 407    {
 408       /* Triangle is contained in a single tile:
 409        */
 410       lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
 411                             lp_rast_arg_triangle(tri) );
 412    }
 413    else
 414    {
 415       int c1 = (tri->c1 +
 416                 tri->dx12 * miny * TILE_SIZE -
 417                 tri->dy12 * minx * TILE_SIZE);
 418       int c2 = (tri->c2 +
 419                 tri->dx23 * miny * TILE_SIZE -
 420                 tri->dy23 * minx * TILE_SIZE);
 421       int c3 = (tri->c3 +
 422                 tri->dx31 * miny * TILE_SIZE -
 423                 tri->dy31 * minx * TILE_SIZE);
 424
 425       int ei1 = tri->ei1 << TILE_ORDER;
 426       int ei2 = tri->ei2 << TILE_ORDER;
 427       int ei3 = tri->ei3 << TILE_ORDER;
 428
 429       int eo1 = tri->eo1 << TILE_ORDER;
 430       int eo2 = tri->eo2 << TILE_ORDER;
 431       int eo3 = tri->eo3 << TILE_ORDER;
 432
 433       int xstep1 = -(tri->dy12 << TILE_ORDER);
 434       int xstep2 = -(tri->dy23 << TILE_ORDER);
 435       int xstep3 = -(tri->dy31 << TILE_ORDER);
 436
 437       int ystep1 = tri->dx12 << TILE_ORDER;
 438       int ystep2 = tri->dx23 << TILE_ORDER;
 439       int ystep3 = tri->dx31 << TILE_ORDER;
 440       int x, y;
 441
 442
 443       /* Trivially accept or reject blocks, else jump to per-pixel
 444        * examination above.
 445        */
 446       for (y = miny; y <= maxy; y++)
 447       {
 448          int cx1 = c1;
 449          int cx2 = c2;
 450          int cx3 = c3;
 451          boolean in = FALSE;  /* are we inside the triangle? */
 452
 453          for (x = minx; x <= maxx; x++)
 454          {
 455             if (cx1 + eo1 < 0 ||
 456                 cx2 + eo2 < 0 ||
 457                 cx3 + eo3 < 0)
 458             {
 459                /* do nothing */
 460                if (in)
 461                   break;  /* exiting triangle, all done with this row */
 462             }
 463             else if (cx1 + ei1 > 0 &&
 464                      cx2 + ei2 > 0 &&
 465                      cx3 + ei3 > 0)
 466             {
 467                in = TRUE;
 468                /* triangle covers the whole tile- shade whole tile */
 469                if(setup->fs.current.opaque) {
 470                   lp_scene_bin_reset( scene, x, y );
 471                   lp_scene_bin_command( scene, x, y,
 472                                         lp_rast_set_state,
 473                                         lp_rast_arg_state(setup->fs.stored) );
 474                }
 475                lp_scene_bin_command( scene, x, y,
 476                                      lp_rast_shade_tile,
 477                                      lp_rast_arg_inputs(&tri->inputs) );
 478             }
 479             else
 480             {
 481                in = TRUE;
 482                /* shade partial tile */
 483                lp_scene_bin_command( scene, x, y,
 484                                      lp_rast_triangle,
 485                                      lp_rast_arg_triangle(tri) );
 486             }
 487
 488             /* Iterate cx values across the region:
 489              */
 490             cx1 += xstep1;
 491             cx2 += xstep2;
 492             cx3 += xstep3;
 493          }
 494
 495          /* Iterate c values down the region:
 496           */
 497          c1 += ystep1;
 498          c2 += ystep2;
 499          c3 += ystep3;
 500       }
 501    }
 502 }
 503
 504
 505 static void triangle_cw( struct setup_context *setup,
 506                          const float (*v0)[4],
 507                          const float (*v1)[4],
 508                          const float (*v2)[4] )
 509 {
 510    do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
 511 }
 512
 513
 514 static void triangle_ccw( struct setup_context *setup,
 515                          const float (*v0)[4],
 516                          const float (*v1)[4],
 517                          const float (*v2)[4] )
 518 {
 519    do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
 520 }
 521
 522
 523 static void triangle_both( struct setup_context *setup,
 524                            const float (*v0)[4],
 525                            const float (*v1)[4],
 526                            const float (*v2)[4] )
 527 {
 528    /* edge vectors e = v0 - v2, f = v1 - v2 */
 529    const float ex = v0[0][0] - v2[0][0];
 530    const float ey = v0[0][1] - v2[0][1];
 531    const float fx = v1[0][0] - v2[0][0];
 532    const float fy = v1[0][1] - v2[0][1];
 533
 534    /* det = cross(e,f).z */
 535    if (ex * fy - ey * fx < 0.0f)
 536       triangle_ccw( setup, v0, v1, v2 );
 537    else
 538       triangle_cw( setup, v0, v1, v2 );
 539 }
 540
 541
 542 static void triangle_nop( struct setup_context *setup,
 543                           const float (*v0)[4],
 544                           const float (*v1)[4],
 545                           const float (*v2)[4] )
 546 {
 547 }
 548
 549
 550 void
 551 lp_setup_choose_triangle( struct setup_context *setup )
 552 {
 553    switch (setup->cullmode) {
 554    case PIPE_WINDING_NONE:
 555       setup->triangle = triangle_both;
 556       break;
 557    case PIPE_WINDING_CCW:
 558       setup->triangle = triangle_cw;
 559       break;
 560    case PIPE_WINDING_CW:
 561       setup->triangle = triangle_ccw;
 562       break;
 563    default:
 564       setup->triangle = triangle_nop;
 565       break;
 566    }
 567 }