src/gallium/drivers/llvmpipe/lp_setup_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Binning code for triangles
  30  */
  31
  32 #include "lp_setup_context.h"
  33 #include "lp_rast.h"
  34 #include "util/u_math.h"
  35 #include "util/u_memory.h"
  36
  37 #define NUM_CHANNELS 4
  38
  39 /**
  40  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  41  */
  42 static void constant_coef( struct lp_rast_triangle *tri,
  43                            unsigned slot,
  44                            const float value,
  45                            unsigned i )
  46 {
  47    tri->inputs.a0[slot][i] = value;
  48    tri->inputs.dadx[slot][i] = 0;
  49    tri->inputs.dady[slot][i] = 0;
  50 }
  51
  52 /**
  53  * Compute a0, dadx and dady for a linearly interpolated coefficient,
  54  * for a triangle.
  55  */
  56 static void linear_coef( struct lp_rast_triangle *tri,
  57                          unsigned slot,
  58                          const float (*v1)[4],
  59                          const float (*v2)[4],
  60                          const float (*v3)[4],
  61                          unsigned vert_attr,
  62                          unsigned i)
  63 {
  64    float a1 = v1[vert_attr][i];
  65    float a2 = v2[vert_attr][i];
  66    float a3 = v3[vert_attr][i];
  67
  68    float da12 = a1 - a2;
  69    float da31 = a3 - a1;
  70    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
  71    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
  72
  73    tri->inputs.dadx[slot][i] = dadx;
  74    tri->inputs.dady[slot][i] = dady;
  75
  76    /* calculate a0 as the value which would be sampled for the
  77     * fragment at (0,0), taking into account that we want to sample at
  78     * pixel centers, in other words (0.5, 0.5).
  79     *
  80     * this is neat but unfortunately not a good way to do things for
  81     * triangles with very large values of dadx or dady as it will
  82     * result in the subtraction and re-addition from a0 of a very
  83     * large number, which means we'll end up loosing a lot of the
  84     * fractional bits and precision from a0.  the way to fix this is
  85     * to define a0 as the sample at a pixel center somewhere near vmin
  86     * instead - i'll switch to this later.
  87     */
  88    tri->inputs.a0[slot][i] = (v1[vert_attr][i] -
  89                               (dadx * (v1[0][0] - 0.5f) +
  90                                dady * (v1[0][1] - 0.5f)));
  91 }
  92
  93
  94 /**
  95  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  96  * for a triangle.
  97  * We basically multiply the vertex value by 1/w before computing
  98  * the plane coefficients (a0, dadx, dady).
  99  * Later, when we compute the value at a particular fragment position we'll
 100  * divide the interpolated value by the interpolated W at that fragment.
 101  */
 102 static void perspective_coef( struct lp_rast_triangle *tri,
 103                               unsigned slot,
 104                               const float (*v1)[4],
 105                               const float (*v2)[4],
 106                               const float (*v3)[4],
 107                               unsigned vert_attr,
 108                               unsigned i)
 109 {
 110    /* premultiply by 1/w  (v[0][3] is always 1/w):
 111     */
 112    float a1 = v1[vert_attr][i] * v1[0][3];
 113    float a2 = v2[vert_attr][i] * v2[0][3];
 114    float a3 = v3[vert_attr][i] * v3[0][3];
 115    float da12 = a1 - a2;
 116    float da31 = a3 - a1;
 117    float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
 118    float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
 119
 120
 121    tri->inputs.dadx[slot][i] = dadx;
 122    tri->inputs.dady[slot][i] = dady;
 123    tri->inputs.a0[slot][i] = (a1 -
 124                               (dadx * (v1[0][0] - 0.5f) +
 125                                dady * (v1[0][1] - 0.5f)));
 126 }
 127
 128
 129 /**
 130  * Special coefficient setup for gl_FragCoord.
 131  * X and Y are trivial, though Y has to be inverted for OpenGL.
 132  * Z and W are copied from position_coef which should have already been computed.
 133  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 134  */
 135 static void
 136 setup_fragcoord_coef(struct lp_rast_triangle *tri,
 137                      unsigned slot,
 138                      const float (*v1)[4],
 139                      const float (*v2)[4],
 140                      const float (*v3)[4])
 141 {
 142    /*X*/
 143    tri->inputs.a0[slot][0] = 0.0;
 144    tri->inputs.dadx[slot][0] = 1.0;
 145    tri->inputs.dady[slot][0] = 0.0;
 146    /*Y*/
 147    tri->inputs.a0[slot][1] = 0.0;
 148    tri->inputs.dadx[slot][1] = 0.0;
 149    tri->inputs.dady[slot][1] = 1.0;
 150    /*Z*/
 151    linear_coef(tri, slot, v1, v2, v3, 0, 2);
 152    /*W*/
 153    linear_coef(tri, slot, v1, v2, v3, 0, 3);
 154 }
 155
 156
 157 static void setup_facing_coef( struct lp_rast_triangle *tri,
 158                                unsigned slot,
 159                                boolean frontface )
 160 {
 161    constant_coef( tri, slot, 1.0f - frontface, 0 );
 162    constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
 163    constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
 164    constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
 165 }
 166
 167
 168 /**
 169  * Compute the tri->coef[] array dadx, dady, a0 values.
 170  */
 171 static void setup_tri_coefficients( struct setup_context *setup,
 172                                     struct lp_rast_triangle *tri,
 173                                     const float (*v1)[4],
 174                                     const float (*v2)[4],
 175                                     const float (*v3)[4],
 176                                     boolean frontface )
 177 {
 178    unsigned slot;
 179
 180    /* The internal position input is in slot zero:
 181     */
 182    setup_fragcoord_coef(tri, 0, v1, v2, v3);
 183
 184    /* setup interpolation for all the remaining attrbutes:
 185     */
 186    for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
 187       unsigned vert_attr = setup->fs.input[slot].src_index;
 188       unsigned i;
 189
 190       switch (setup->fs.input[slot].interp) {
 191       case LP_INTERP_CONSTANT:
 192          for (i = 0; i < NUM_CHANNELS; i++)
 193             constant_coef(tri, slot+1, v3[vert_attr][i], i);
 194          break;
 195
 196       case LP_INTERP_LINEAR:
 197          for (i = 0; i < NUM_CHANNELS; i++)
 198             linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
 199          break;
 200
 201       case LP_INTERP_PERSPECTIVE:
 202          for (i = 0; i < NUM_CHANNELS; i++)
 203             perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
 204          break;
 205
 206       case LP_INTERP_POSITION:
 207          /* XXX: fix me - duplicates the values in slot zero.
 208           */
 209          setup_fragcoord_coef(tri, slot+1, v1, v2, v3);
 210          break;
 211
 212       case LP_INTERP_FACING:
 213          setup_facing_coef(tri, slot+1, frontface);
 214          break;
 215
 216       default:
 217          assert(0);
 218       }
 219    }
 220 }
 221
 222
 223
 224 /* XXX: do this by add/subtracting a large floating point number:
 225  */
 226 static inline float subpixel_snap( float a )
 227 {
 228    int i = a * 16;
 229    return (float)i * (1.0/16);
 230 }
 231
 232
 233 static INLINE void bin_triangle( struct cmd_block_list *list,
 234                                  const struct lp_rast_triangle arg )
 235 {
 236 }
 237
 238
 239 /* to avoid having to allocate power-of-four, square render targets,
 240  * end up having a specialized version of the above that runs only at
 241  * the topmost level.
 242  *
 243  * at the topmost level there may be an arbitary number of steps on
 244  * either dimension, so this loop needs to be either separately
 245  * code-generated and unrolled for each render target size, or kept as
 246  * generic looping code:
 247  */
 248
 249 #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
 250 #define MAX3(a,b,c) MAX2(MAX2(a,b),c)
 251
 252 static void
 253 do_triangle_ccw(struct setup_context *setup,
 254                 const float (*v1)[4],
 255                 const float (*v2)[4],
 256                 const float (*v3)[4],
 257                 boolean frontfacing )
 258 {
 259    const int rt_width = setup->fb.width;
 260    const int rt_height = setup->fb.height;
 261
 262    const float y1 = subpixel_snap(v1[0][1]);
 263    const float y2 = subpixel_snap(v2[0][1]);
 264    const float y3 = subpixel_snap(v3[0][1]);
 265
 266    const float x1 = subpixel_snap(v1[0][0]);
 267    const float x2 = subpixel_snap(v2[0][0]);
 268    const float x3 = subpixel_snap(v3[0][0]);
 269
 270    struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri );
 271    float area;
 272    int minx, maxx, miny, maxy;
 273    float c1, c2, c3;
 274
 275    tri->inputs.state = setup->fs.stored;
 276
 277    tri->dx12 = x1 - x2;
 278    tri->dx23 = x2 - x3;
 279    tri->dx31 = x3 - x1;
 280
 281    tri->dy12 = y1 - y2;
 282    tri->dy23 = y2 - y3;
 283    tri->dy31 = y3 - y1;
 284
 285    area = (tri->dx12 * tri->dy31 -
 286            tri->dx31 * tri->dy12);
 287
 288    /* Cull non-ccw and zero-sized triangles.
 289     */
 290    if (area <= 0 || util_is_inf_or_nan(area))
 291       return;
 292
 293    // Bounding rectangle
 294    minx = util_iround(MIN3(x1, x2, x3) - .5);
 295    maxx = util_iround(MAX3(x1, x2, x3) + .5);
 296    miny = util_iround(MIN3(y1, y2, y3) - .5);
 297    maxy = util_iround(MAX3(y1, y2, y3) + .5);
 298
 299    /* Clamp to framebuffer (or tile) dimensions:
 300     */
 301    miny = MAX2(0, miny);
 302    minx = MAX2(0, minx);
 303    maxy = MIN2(rt_height, maxy);
 304    maxx = MIN2(rt_width, maxx);
 305
 306    if (miny == maxy || minx == maxx)
 307       return;
 308
 309    tri->miny = miny;
 310    tri->minx = minx;
 311    tri->maxy = maxy;
 312    tri->maxx = maxx;
 313
 314    /* The only divide in this code.  Is it really needed?
 315     */
 316    tri->oneoverarea = 1.0f / area;
 317
 318    /* Setup parameter interpolants:
 319     */
 320    setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing );
 321
 322    /* half-edge constants, will be interated over the whole
 323     * rendertarget.
 324     */
 325    tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
 326    tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
 327    tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
 328
 329    /* correct for top-left fill convention:
 330     */
 331    if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1 += 1.0/16.0f;
 332    if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2 += 1.0/16.0f;
 333    if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3 += 1.0/16.0f;
 334
 335    /* find trivial reject offsets for each edge for a single-pixel
 336     * sized block.  These will be scaled up at each recursive level to
 337     * match the active blocksize.  Scaling in this way works best if
 338     * the blocks are square.
 339     */
 340    tri->eo1 = 0;
 341    if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
 342    if (tri->dx12 > 0) tri->eo1 += tri->dx12;
 343
 344    tri->eo2 = 0;
 345    if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
 346    if (tri->dx23 > 0) tri->eo2 += tri->dx23;
 347
 348    tri->eo3 = 0;
 349    if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
 350    if (tri->dx31 > 0) tri->eo3 += tri->dx31;
 351
 352    /* Calculate trivial accept offsets from the above.
 353     */
 354    tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
 355    tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
 356    tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
 357
 358    minx &= ~(TILESIZE-1);               /* aligned blocks */
 359    miny &= ~(TILESIZE-1);               /* aligned blocks */
 360
 361    c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx;
 362    c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx;
 363    c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx;
 364
 365    minx /= TILESIZE;
 366    miny /= TILESIZE;
 367    maxx /= TILESIZE;
 368    maxy /= TILESIZE;
 369
 370    /* Convert to tile coordinates:
 371     */
 372    if (miny == maxy && minx == maxx)
 373    {
 374       /* Triangle is contained in a single tile:
 375        */
 376       bin_command( &setup->tile[minx][miny], lp_rast_triangle,
 377                    lp_rast_arg_triangle(tri) );
 378    }
 379    else
 380    {
 381       const int step = TILESIZE;
 382
 383       float ei1 = tri->ei1 * step;
 384       float ei2 = tri->ei2 * step;
 385       float ei3 = tri->ei3 * step;
 386
 387       float eo1 = tri->eo1 * step;
 388       float eo2 = tri->eo2 * step;
 389       float eo3 = tri->eo3 * step;
 390
 391       float xstep1 = -step * tri->dy12;
 392       float xstep2 = -step * tri->dy23;
 393       float xstep3 = -step * tri->dy31;
 394
 395       float ystep1 = step * tri->dx12;
 396       float ystep2 = step * tri->dx23;
 397       float ystep3 = step * tri->dx31;
 398       int x, y;
 399
 400
 401       /* Subdivide space into NxM blocks, where each block is square and
 402        * power-of-four in dimension.
 403        *
 404        * Trivially accept or reject blocks, else jump to per-pixel
 405        * examination above.
 406        */
 407       for (y = miny; y <= maxy; y++)
 408       {
 409          float cx1 = c1;
 410          float cx2 = c2;
 411          float cx3 = c3;
 412
 413          for (x = minx; x <= maxx; x++)
 414          {
 415             if (cx1 + eo1 < 0 ||
 416                 cx2 + eo2 < 0 ||
 417                 cx3 + eo3 < 0)
 418             {
 419                /* do nothing */
 420             }
 421             else if (cx1 + ei1 > 0 &&
 422                      cx2 + ei2 > 0 &&
 423                      cx3 + ei3 > 0)
 424             {
 425                /* shade whole tile */
 426                bin_command( &setup->tile[x][y], lp_rast_shade_tile,
 427                             lp_rast_arg_inputs(&tri->inputs) );
 428             }
 429             else
 430             {
 431                /* shade partial tile */
 432                bin_command( &setup->tile[x][y],
 433                             lp_rast_triangle,
 434                             lp_rast_arg_triangle(tri) );
 435             }
 436
 437             /* Iterate cx values across the region:
 438              */
 439             cx1 += xstep1;
 440             cx2 += xstep2;
 441             cx3 += xstep3;
 442          }
 443
 444          /* Iterate c values down the region:
 445           */
 446          c1 += ystep1;
 447          c2 += ystep2;
 448          c3 += ystep3;
 449       }
 450    }
 451 }
 452
 453 static void triangle_cw( struct setup_context *setup,
 454                          const float (*v0)[4],
 455                          const float (*v1)[4],
 456                          const float (*v2)[4] )
 457 {
 458    do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
 459 }
 460
 461 static void triangle_ccw( struct setup_context *setup,
 462                          const float (*v0)[4],
 463                          const float (*v1)[4],
 464                          const float (*v2)[4] )
 465 {
 466    do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
 467 }
 468
 469 static void triangle_both( struct setup_context *setup,
 470                            const float (*v0)[4],
 471                            const float (*v1)[4],
 472                            const float (*v2)[4] )
 473 {
 474    /* edge vectors e = v0 - v2, f = v1 - v2 */
 475    const float ex = v0[0][0] - v2[0][0];
 476    const float ey = v0[0][1] - v2[0][1];
 477    const float fx = v1[0][0] - v2[0][0];
 478    const float fy = v1[0][1] - v2[0][1];
 479
 480    /* det = cross(e,f).z */
 481    if (ex * fy - ey * fx < 0)
 482       triangle_ccw( setup, v0, v1, v2 );
 483    else
 484       triangle_cw( setup, v0, v1, v2 );
 485 }
 486
 487 static void triangle_nop( struct setup_context *setup,
 488                           const float (*v0)[4],
 489                           const float (*v1)[4],
 490                           const float (*v2)[4] )
 491 {
 492 }
 493
 494
 495 void
 496 lp_setup_choose_triangle( struct setup_context *setup )
 497 {
 498    switch (setup->cullmode) {
 499    case PIPE_WINDING_NONE:
 500       setup->triangle = triangle_both;
 501       break;
 502    case PIPE_WINDING_CCW:
 503       setup->triangle = triangle_cw;
 504       break;
 505    case PIPE_WINDING_CW:
 506       setup->triangle = triangle_ccw;
 507       break;
 508    default:
 509       setup->triangle = triangle_nop;
 510       break;
 511    }
 512 }
 513
 514