src/gallium/drivers/llvmpipe/lp_setup_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Binning code for triangles
  30  */
  31
  32 #include "lp_setup.h"
  33 #include "lp_state.h"
  34 #include "util/u_math.h"
  35 #include "util/u_memory.h"
  36
  37
  38 /**
  39  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  40  */
  41 static void constant_coef( struct tgsi_interp_coef *coef,
  42                            const float (*v3)[4],
  43                            unsigned vert_attr,
  44                            unsigned i )
  45 {
  46    coef->a0[i] = v3[vert_attr][i];
  47    coef->dadx[i] = 0;
  48    coef->dady[i] = 0;
  49 }
  50
  51 /**
  52  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  53  * for a triangle.
  54  */
  55 static void linear_coef( struct triangle *tri,
  56                          struct tgsi_interp_coef *coef,
  57                          const float (*v1)[4],
  58                          const float (*v2)[4],
  59                          const float (*v3)[4],
  60                          unsigned vert_attr,
  61                          unsigned i)
  62 {
  63    float a1 = v1[vert_attr][i];
  64    float a2 = v2[vert_attr][i];
  65    float a3 = v3[vert_attr][i];
  66
  67    float da12 = a1 - a2;
  68    float da31 = a3 - a1;
  69    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
  70    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
  71
  72    coef->dadx[i] = dadx;
  73    coef->dady[i] = dady;
  74
  75    /* calculate a0 as the value which would be sampled for the
  76     * fragment at (0,0), taking into account that we want to sample at
  77     * pixel centers, in other words (0.5, 0.5).
  78     *
  79     * this is neat but unfortunately not a good way to do things for
  80     * triangles with very large values of dadx or dady as it will
  81     * result in the subtraction and re-addition from a0 of a very
  82     * large number, which means we'll end up loosing a lot of the
  83     * fractional bits and precision from a0.  the way to fix this is
  84     * to define a0 as the sample at a pixel center somewhere near vmin
  85     * instead - i'll switch to this later.
  86     */
  87    coef->a0[i] = (v1[vert_attr][i] -
  88                   (dadx * (v1[0][0] - 0.5f) +
  89                    dady * (v1[0][1] - 0.5f)));
  90 }
  91
  92
  93 /**
  94  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  95  * for a triangle.
  96  * We basically multiply the vertex value by 1/w before computing
  97  * the plane coefficients (a0, dadx, dady).
  98  * Later, when we compute the value at a particular fragment position we'll
  99  * divide the interpolated value by the interpolated W at that fragment.
 100  */
 101 static void perspective_coef( struct triangle *tri,
 102                               struct tgsi_interp_coef *coef,
 103                               const float (*v1)[4],
 104                               const float (*v2)[4],
 105                               const float (*v3)[4],
 106                               unsigned vert_attr,
 107                               unsigned i)
 108 {
 109    /* premultiply by 1/w  (v[0][3] is always 1/w):
 110     */
 111    float a1 = v1[vert_attr][i] * v1[0][3];
 112    float a2 = v2[vert_attr][i] * v2[0][3];
 113    float a3 = v3[vert_attr][i] * v3[0][3];
 114    float da12 = a1 - a2;
 115    float da31 = a3 - a1;
 116    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
 117    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
 118
 119
 120    coef->dadx[i] = dadx;
 121    coef->dady[i] = dady;
 122    coef->a0[i] = (a1 -
 123                   (dadx * (v1[0][0] - 0.5f) +
 124                    dady * (v1[0][1] - 0.5f)));
 125 }
 126
 127
 128 /**
 129  * Special coefficient setup for gl_FragCoord.
 130  * X and Y are trivial, though Y has to be inverted for OpenGL.
 131  * Z and W are copied from position_coef which should have already been computed.
 132  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 133  */
 134 static void
 135 setup_fragcoord_coef(struct triangle *tri, unsigned slot)
 136 {
 137    /*X*/
 138    tri->coef[slot].a0[0] = 0.0;
 139    tri->coef[slot].dadx[0] = 1.0;
 140    tri->coef[slot].dady[0] = 0.0;
 141    /*Y*/
 142    tri->coef[slot].a0[1] = 0.0;
 143    tri->coef[slot].dadx[1] = 0.0;
 144    tri->coef[slot].dady[1] = 1.0;
 145    /*Z*/
 146    tri->coef[slot].a0[2] = tri->position_coef.a0[2];
 147    tri->coef[slot].dadx[2] = tri->position_coef.dadx[2];
 148    tri->coef[slot].dady[2] = tri->position_coef.dady[2];
 149    /*W*/
 150    tri->coef[slot].a0[3] = tri->position_coef.a0[3];
 151    tri->coef[slot].dadx[3] = tri->position_coef.dadx[3];
 152    tri->coef[slot].dady[3] = tri->position_coef.dady[3];
 153 }
 154
 155
 156
 157 /**
 158  * Compute the tri->coef[] array dadx, dady, a0 values.
 159  */
 160 static void setup_tri_coefficients( struct setup_context *setup,
 161                                     struct triangle *tri,
 162                                     const float (*v1)[4],
 163                                     const float (*v2)[4],
 164                                     const float (*v3)[4],
 165                                     boolean frontface )
 166 {
 167    const struct vertex_info *vinfo = setup->vinfo;
 168    unsigned input;
 169
 170    /* z and w are done by linear interpolation:
 171     */
 172    linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2);
 173    linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3);
 174
 175    /* setup interpolation for all the remaining attributes:
 176     */
 177    for (input = 0; input < vinfo->num_fs_inputs; input++) {
 178       unsigned vert_attr = vinfo->attrib[input].src_index;
 179       unsigned i;
 180
 181       switch (vinfo->attrib[input].interp_mode) {
 182       case INTERP_CONSTANT:
 183          for (i = 0; i < NUM_CHANNELS; i++)
 184             constant_coef(tri->coef[input], v3, vert_attr, i);
 185          break;
 186
 187       case INTERP_LINEAR:
 188          for (i = 0; i < NUM_CHANNELS; i++)
 189             linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i);
 190          break;
 191
 192       case INTERP_PERSPECTIVE:
 193          for (i = 0; i < NUM_CHANNELS; i++)
 194             perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i);
 195          break;
 196
 197       case INTERP_POS:
 198          setup_fragcoord_coef(tri, input);
 199          break;
 200
 201       case INTERP_FACING:
 202          tri->coef[input].a0[0] = 1.0f - frontface;
 203          tri->coef[input].dadx[0] = 0.0;
 204          tri->coef[input].dady[0] = 0.0;
 205          break;
 206
 207       default:
 208          assert(0);
 209       }
 210    }
 211 }
 212
 213
 214
 215 /* XXX: do this by add/subtracting a large floating point number:
 216  */
 217 static inline float subpixel_snap( float a )
 218 {
 219    int i = a * 16;
 220    return (float)i * (1.0/16);
 221 }
 222
 223
 224
 225
 226
 227 /* to avoid having to allocate power-of-four, square render targets,
 228  * end up having a specialized version of the above that runs only at
 229  * the topmost level.
 230  *
 231  * at the topmost level there may be an arbitary number of steps on
 232  * either dimension, so this loop needs to be either separately
 233  * code-generated and unrolled for each render target size, or kept as
 234  * generic looping code:
 235  */
 236
 237 #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
 238 #define MAX3(a,b,c) MAX2(MAX2(a,b),c)
 239
 240 static void
 241 do_triangle_ccw(struct lp_setup *setup,
 242                 const float (*v1)[4],
 243                 const float (*v2)[4],
 244                 const float (*v3)[4],
 245                 boolean frontfacing )
 246 {
 247    const int rt_width = setup->framebuffer.cbufs[0]->width;
 248    const int rt_height = setup->framebuffer.cbufs[0]->height;
 249
 250    const float y1 = subpixel_snap(v1[0][1]);
 251    const float y2 = subpixel_snap(v2[0][1]);
 252    const float y3 = subpixel_snap(v3[0][1]);
 253
 254    const float x1 = subpixel_snap(v1[0][0]);
 255    const float x2 = subpixel_snap(v2[0][0]);
 256    const float x3 = subpixel_snap(v3[0][0]);
 257
 258    struct triangle *tri = allocate_triangle( setup );
 259    float area;
 260    float c1, c2, c3;
 261    int i;
 262    int minx, maxx, miny, maxy;
 263
 264    tri->dx12 = x1 - x2;
 265    tri->dx23 = x2 - x3;
 266    tri->dx31 = x3 - x1;
 267
 268    tri->dy12 = y1 - y2;
 269    tri->dy23 = y2 - y3;
 270    tri->dy31 = y3 - y1;
 271
 272    area = (tri->dx12 * tri->dy31 -
 273            tri->dx31 * tri->dy12);
 274
 275    /* Cull non-ccw and zero-sized triangles.
 276     */
 277    if (area <= 0 || util_is_inf_or_nan(area))
 278       return;
 279
 280    // Bounding rectangle
 281    minx = util_iround(MIN3(x1, x2, x3) - .5);
 282    maxx = util_iround(MAX3(x1, x2, x3) + .5);
 283    miny = util_iround(MIN3(y1, y2, y3) - .5);
 284    maxy = util_iround(MAX3(y1, y2, y3) + .5);
 285
 286    /* Clamp to framebuffer (or tile) dimensions:
 287     */
 288    miny = MAX2(0, miny);
 289    minx = MAX2(0, minx);
 290    maxy = MIN2(rt_height, maxy);
 291    maxx = MIN2(rt_width, maxx);
 292
 293    if (miny == maxy || minx == maxx)
 294       return;
 295
 296    /* The only divide in this code.  Is it really needed?
 297     */
 298    tri->oneoverarea = 1.0f / area;
 299
 300    /* Setup parameter interpolants:
 301     */
 302    setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing );
 303
 304    /* half-edge constants, will be interated over the whole
 305     * rendertarget.
 306     */
 307    c1 = tri->dy12 * x1 - tri->dx12 * y1;
 308    c2 = tri->dy23 * x2 - tri->dx23 * y2;
 309    c3 = tri->dy31 * x3 - tri->dx31 * y3;
 310
 311    /* correct for top-left fill convention:
 312     */
 313    if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++;
 314    if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++;
 315    if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++;
 316
 317    /* find trivial reject offsets for each edge for a single-pixel
 318     * sized block.  These will be scaled up at each recursive level to
 319     * match the active blocksize.  Scaling in this way works best if
 320     * the blocks are square.
 321     */
 322    tri->eo1 = 0;
 323    if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
 324    if (tri->dx12 > 0) tri->eo1 += tri->dx12;
 325
 326    tri->eo2 = 0;
 327    if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
 328    if (tri->dx23 > 0) tri->eo2 += tri->dx23;
 329
 330    tri->eo3 = 0;
 331    if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
 332    if (tri->dx31 > 0) tri->eo3 += tri->dx31;
 333
 334    /* Calculate trivial accept offsets from the above.
 335     */
 336    tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
 337    tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
 338    tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
 339
 340    minx &= ~(TILESIZE-1);               /* aligned blocks */
 341    miny &= ~(TILESIZE-1);               /* aligned blocks */
 342
 343    c1 += tri->dx12 * miny - tri->dy12 * minx;
 344    c2 += tri->dx23 * miny - tri->dy23 * minx;
 345    c3 += tri->dx31 * miny - tri->dy31 * minx;
 346
 347    /* Convert to tile coordinates:
 348     */
 349    minx /= TILESIZE;
 350    maxx /= TILESIZE;
 351    miny /= TILESIZE;
 352    maxy /= TILESIZE;
 353
 354    if (miny == maxy && minx == maxx)
 355    {
 356       /* Triangle is contained in a single tile:
 357        */
 358       bin_command(setup->tile[minx][miny], lp_rast_triangle, tri );
 359    }
 360    else
 361    {
 362       const int step = TILESIZE;
 363
 364       float ei1 = tri->ei1 * step;
 365       float ei2 = tri->ei2 * step;
 366       float ei3 = tri->ei3 * step;
 367
 368       float eo1 = tri->eo1 * step;
 369       float eo2 = tri->eo2 * step;
 370       float eo3 = tri->eo3 * step;
 371
 372       float xstep1 = -step * tri->dy12;
 373       float xstep2 = -step * tri->dy23;
 374       float xstep3 = -step * tri->dy31;
 375
 376       float ystep1 = step * tri->dx12;
 377       float ystep2 = step * tri->dx23;
 378       float ystep3 = step * tri->dx31;
 379       int x, y;
 380
 381
 382       /* Subdivide space into NxM blocks, where each block is square and
 383        * power-of-four in dimension.
 384        *
 385        * Trivially accept or reject blocks, else jump to per-pixel
 386        * examination above.
 387        */
 388       for (y = miny; y < maxy; y++)
 389       {
 390          float cx1 = c1;
 391          float cx2 = c2;
 392          float cx3 = c3;
 393
 394          for (x = minx; x < maxx; x++)
 395          {
 396             if (cx1 + eo1 < 0 ||
 397                 cx2 + eo2 < 0 ||
 398                 cx3 + eo3 < 0)
 399             {
 400                /* do nothing */
 401             }
 402             else if (cx1 + ei1 > 0 &&
 403                      cx2 + ei2 > 0 &&
 404                      cx3 + ei3 > 0)
 405             {
 406                /* shade whole tile */
 407                bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs );
 408             }
 409             else
 410             {
 411                /* shade partial tile */
 412                bin_command(setup->tile[x][y], lp_rast_triangle, tri );
 413             }
 414
 415             /* Iterate cx values across the region:
 416              */
 417             cx1 += xstep1;
 418             cx2 += xstep2;
 419             cx3 += xstep3;
 420          }
 421
 422          /* Iterate c values down the region:
 423           */
 424          c1 += ystep1;
 425          c2 += ystep2;
 426          c3 += ystep3;
 427       }
 428    }
 429 }
 430
 431 static void triangle_cw( struct setup_context *setup,
 432                          const float (*v0)[4],
 433                          const float (*v1)[4],
 434                          const float (*v2)[4] )
 435 {
 436    do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
 437 }
 438
 439 static void triangle_ccw( struct setup_context *setup,
 440                          const float (*v0)[4],
 441                          const float (*v1)[4],
 442                          const float (*v2)[4] )
 443 {
 444    do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
 445 }
 446
 447 static void triangle_both( struct setup_context *setup,
 448                            const float (*v0)[4],
 449                            const float (*v1)[4],
 450                            const float (*v2)[4] )
 451 {
 452    /* edge vectors e = v0 - v2, f = v1 - v2 */
 453    const float ex = v0[0][0] - v2[0][0];
 454    const float ey = v0[0][1] - v2[0][1];
 455    const float fx = v1[0][0] - v2[0][0];
 456    const float fy = v1[0][1] - v2[0][1];
 457
 458    /* det = cross(e,f).z */
 459    if (ex * fy - ey * fx < 0)
 460       triangle_ccw( setup, v0, v1, v2 );
 461    else
 462       triangle_cw( setup, v0, v1, v2 );
 463 }
 464
 465 static void triangle_nop( struct setup_context *setup,
 466                           const float (*v0)[4],
 467                           const float (*v1)[4],
 468                           const float (*v2)[4] )
 469 {
 470 }
 471
 472 void setup_set_tri_state( struct setup_context *setup,
 473                           unsigned cull_mode,
 474                           boolean ccw_is_frontface)
 475 {
 476    setup->ccw_is_frontface = ccw_is_frontface;
 477
 478    switch (cull_mode) {
 479    case PIPE_WINDING_NONE:
 480       setup->triangle = triangle_both;
 481       break;
 482    case PIPE_WINDING_CCW:
 483       setup->triangle = triangle_cw;
 484       break;
 485    case PIPE_WINDING_CW:
 486       setup->triangle = triangle_ccw;
 487       break;
 488    default:
 489       setup->triangle = triangle_nop;
 490       break;
 491    }
 492 }
 493
 494