src/gallium/drivers/softpipe/sp_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30  *
  31  * \author  Keith Whitwell <keith@tungstengraphics.com>
  32  * \author  Brian Paul
  33  */
  34
  35 #include "sp_setup.h"
  36
  37 #include "sp_context.h"
  38 #include "sp_headers.h"
  39 #include "sp_quad.h"
  40 #include "sp_state.h"
  41 #include "sp_prim_setup.h"
  42 #include "draw/draw_private.h"
  43 #include "draw/draw_vertex.h"
  44 #include "pipe/p_util.h"
  45 #include "pipe/p_shader_tokens.h"
  46
  47 #define DEBUG_VERTS 0
  48 #define DEBUG_FRAGS 0
  49
  50 /**
  51  * Triangle edge info
  52  */
  53 struct edge {
  54    float dx;            /**< X(v1) - X(v0), used only during setup */
  55    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  56    float dxdy;          /**< dx/dy */
  57    float sx, sy;        /**< first sample point coord */
  58    int lines;           /**< number of lines on this edge */
  59 };
  60
  61
  62 /**
  63  * Triangle setup info (derived from draw_stage).
  64  * Also used for line drawing (taking some liberties).
  65  */
  66 struct setup_context {
  67    struct softpipe_context *softpipe;
  68
  69    /* Vertices are just an array of floats making up each attribute in
  70     * turn.  Currently fixed at 4 floats, but should change in time.
  71     * Codegen will help cope with this.
  72     */
  73    const float (*vmax)[4];
  74    const float (*vmid)[4];
  75    const float (*vmin)[4];
  76    const float (*vprovoke)[4];
  77
  78    struct edge ebot;
  79    struct edge etop;
  80    struct edge emaj;
  81
  82    float oneoverarea;
  83
  84    struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
  85    struct tgsi_interp_coef posCoef;  /* For Z, W */
  86    struct quad_header quad;
  87
  88    struct {
  89       int left[2];   /**< [0] = row0, [1] = row1 */
  90       int right[2];
  91       int y;
  92       unsigned y_flags;
  93       unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
  94    } span;
  95
  96 #if DEBUG_FRAGS
  97    uint numFragsEmitted;  /**< per primitive */
  98    uint numFragsWritten;  /**< per primitive */
  99 #endif
 100
 101    unsigned winding;            /* which winding to cull */
 102 };
 103
 104
 105
 106
 107
 108 static boolean cull_tri( struct setup_context *setup,
 109                       float det )
 110 {
 111    if (det != 0)
 112    {
 113       /* if (det < 0 then Z points toward camera and triangle is
 114        * counter-clockwise winding.
 115        */
 116       unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
 117
 118       if ((winding & setup->winding) == 0)
 119          return FALSE;
 120    }
 121
 122    /* Culled:
 123     */
 124    return TRUE;
 125 }
 126
 127
 128
 129 /**
 130  * Clip setup->quad against the scissor/surface bounds.
 131  */
 132 static INLINE void
 133 quad_clip(struct setup_context *setup)
 134 {
 135    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
 136    const int minx = (int) cliprect->minx;
 137    const int maxx = (int) cliprect->maxx;
 138    const int miny = (int) cliprect->miny;
 139    const int maxy = (int) cliprect->maxy;
 140
 141    if (setup->quad.x0 >= maxx ||
 142        setup->quad.y0 >= maxy ||
 143        setup->quad.x0 + 1 < minx ||
 144        setup->quad.y0 + 1 < miny) {
 145       /* totally clipped */
 146       setup->quad.mask = 0x0;
 147       return;
 148    }
 149    if (setup->quad.x0 < minx)
 150       setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
 151    if (setup->quad.y0 < miny)
 152       setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 153    if (setup->quad.x0 == maxx - 1)
 154       setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
 155    if (setup->quad.y0 == maxy - 1)
 156       setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
 157 }
 158
 159
 160 /**
 161  * Emit a quad (pass to next stage) with clipping.
 162  */
 163 static INLINE void
 164 clip_emit_quad(struct setup_context *setup)
 165 {
 166    quad_clip(setup);
 167    if (setup->quad.mask) {
 168       struct softpipe_context *sp = setup->softpipe;
 169       sp->quad.first->run(sp->quad.first, &setup->quad);
 170    }
 171 }
 172
 173
 174 /**
 175  * Emit a quad (pass to next stage).  No clipping is done.
 176  */
 177 static INLINE void
 178 emit_quad( struct setup_context *setup, int x, int y, unsigned mask )
 179 {
 180    struct softpipe_context *sp = setup->softpipe;
 181    setup->quad.x0 = x;
 182    setup->quad.y0 = y;
 183    setup->quad.mask = mask;
 184 #if DEBUG_FRAGS
 185    if (mask & 1) setup->numFragsEmitted++;
 186    if (mask & 2) setup->numFragsEmitted++;
 187    if (mask & 4) setup->numFragsEmitted++;
 188    if (mask & 8) setup->numFragsEmitted++;
 189 #endif
 190    sp->quad.first->run(sp->quad.first, &setup->quad);
 191 #if DEBUG_FRAGS
 192    mask = setup->quad.mask;
 193    if (mask & 1) setup->numFragsWritten++;
 194    if (mask & 2) setup->numFragsWritten++;
 195    if (mask & 4) setup->numFragsWritten++;
 196    if (mask & 8) setup->numFragsWritten++;
 197 #endif
 198 }
 199
 200
 201 /**
 202  * Given an X or Y coordinate, return the block/quad coordinate that it
 203  * belongs to.
 204  */
 205 static INLINE int block( int x )
 206 {
 207    return x & ~1;
 208 }
 209
 210
 211 /**
 212  * Compute mask which indicates which pixels in the 2x2 quad are actually inside
 213  * the triangle's bounds.
 214  *
 215  * this is pretty nasty...  may need to rework flush_spans again to
 216  * fix it, if possible.
 217  */
 218 static unsigned calculate_mask( struct setup_context *setup, int x )
 219 {
 220    unsigned mask = 0x0;
 221
 222    if (x >= setup->span.left[0] && x < setup->span.right[0])
 223       mask |= MASK_TOP_LEFT;
 224
 225    if (x >= setup->span.left[1] && x < setup->span.right[1])
 226       mask |= MASK_BOTTOM_LEFT;
 227
 228    if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0])
 229       mask |= MASK_TOP_RIGHT;
 230
 231    if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1])
 232       mask |= MASK_BOTTOM_RIGHT;
 233
 234    return mask;
 235 }
 236
 237
 238 /**
 239  * Render a horizontal span of quads
 240  */
 241 static void flush_spans( struct setup_context *setup )
 242 {
 243    int minleft, maxright;
 244    int x;
 245
 246    switch (setup->span.y_flags) {
 247    case 0x3:
 248       /* both odd and even lines written (both quad rows) */
 249       minleft = MIN2(setup->span.left[0], setup->span.left[1]);
 250       maxright = MAX2(setup->span.right[0], setup->span.right[1]);
 251       break;
 252
 253    case 0x1:
 254       /* only even line written (quad top row) */
 255       minleft = setup->span.left[0];
 256       maxright = setup->span.right[0];
 257       break;
 258
 259    case 0x2:
 260       /* only odd line written (quad bottom row) */
 261       minleft = setup->span.left[1];
 262       maxright = setup->span.right[1];
 263       break;
 264
 265    default:
 266       return;
 267    }
 268
 269    /* XXX this loop could be moved into the above switch cases and
 270     * calculate_mask() could be simplified a bit...
 271     */
 272    for (x = block(minleft); x <= block(maxright); x += 2) {
 273       emit_quad( setup, x, setup->span.y,
 274                  calculate_mask( setup, x ) );
 275    }
 276
 277    setup->span.y = 0;
 278    setup->span.y_flags = 0;
 279    setup->span.right[0] = 0;
 280    setup->span.right[1] = 0;
 281 }
 282
 283 #if DEBUG_VERTS
 284 static void print_vertex(const struct setup_context *setup,
 285                          const float (*v)[4])
 286 {
 287    int i;
 288    debug_printf("Vertex: (%p)\n", v);
 289    for (i = 0; i < setup->quad.nr_attrs; i++) {
 290       debug_printf("  %d: %f %f %f %f\n",  i,
 291               v[i][0], v[i][1], v[i][2], v[i][3]);
 292    }
 293 }
 294 #endif
 295
 296 static boolean setup_sort_vertices( struct setup_context *setup,
 297                                     float det,
 298                                     const float (*v0)[4],
 299                                     const float (*v1)[4],
 300                                     const float (*v2)[4] )
 301 {
 302 #if DEBUG_VERTS
 303    debug_printf("Triangle:\n");
 304    print_vertex(setup, v0);
 305    print_vertex(setup, v1);
 306    print_vertex(setup, v2);
 307 #endif
 308
 309    setup->vprovoke = v2;
 310
 311    /* determine bottom to top order of vertices */
 312    {
 313       float y0 = v0[0][1];
 314       float y1 = v1[0][1];
 315       float y2 = v2[0][1];
 316       if (y0 <= y1) {
 317          if (y1 <= y2) {
 318             /* y0<=y1<=y2 */
 319             setup->vmin = v0;
 320             setup->vmid = v1;
 321             setup->vmax = v2;
 322          }
 323          else if (y2 <= y0) {
 324             /* y2<=y0<=y1 */
 325             setup->vmin = v2;
 326             setup->vmid = v0;
 327             setup->vmax = v1;
 328          }
 329          else {
 330             /* y0<=y2<=y1 */
 331             setup->vmin = v0;
 332             setup->vmid = v2;
 333             setup->vmax = v1;
 334          }
 335       }
 336       else {
 337          if (y0 <= y2) {
 338             /* y1<=y0<=y2 */
 339             setup->vmin = v1;
 340             setup->vmid = v0;
 341             setup->vmax = v2;
 342          }
 343          else if (y2 <= y1) {
 344             /* y2<=y1<=y0 */
 345             setup->vmin = v2;
 346             setup->vmid = v1;
 347             setup->vmax = v0;
 348          }
 349          else {
 350             /* y1<=y2<=y0 */
 351             setup->vmin = v1;
 352             setup->vmid = v2;
 353             setup->vmax = v0;
 354          }
 355       }
 356    }
 357
 358    setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
 359    setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
 360    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
 361    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
 362    setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
 363    setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
 364
 365    /*
 366     * Compute triangle's area.  Use 1/area to compute partial
 367     * derivatives of attributes later.
 368     *
 369     * The area will be the same as prim->det, but the sign may be
 370     * different depending on how the vertices get sorted above.
 371     *
 372     * To determine whether the primitive is front or back facing we
 373     * use the prim->det value because its sign is correct.
 374     */
 375    {
 376       const float area = (setup->emaj.dx * setup->ebot.dy -
 377                             setup->ebot.dx * setup->emaj.dy);
 378
 379       setup->oneoverarea = 1.0f / area;
 380       /*
 381       debug_printf("%s one-over-area %f  area %f  det %f\n",
 382                    __FUNCTION__, setup->oneoverarea, area, det );
 383       */
 384    }
 385
 386    /* We need to know if this is a front or back-facing triangle for:
 387     *  - the GLSL gl_FrontFacing fragment attribute (bool)
 388     *  - two-sided stencil test
 389     */
 390    setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
 391
 392    return TRUE;
 393 }
 394
 395
 396 /**
 397  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 398  * The value value comes from vertex[slot][i].
 399  * The result will be put into setup->coef[slot].a0[i].
 400  * \param slot  which attribute slot
 401  * \param i  which component of the slot (0..3)
 402  */
 403 static void const_coeff( struct setup_context *setup,
 404                          struct tgsi_interp_coef *coef,
 405                          uint vertSlot, uint i)
 406 {
 407    assert(i <= 3);
 408
 409    coef->dadx[i] = 0;
 410    coef->dady[i] = 0;
 411
 412    /* need provoking vertex info!
 413     */
 414    coef->a0[i] = setup->vprovoke[vertSlot][i];
 415 }
 416
 417
 418 /**
 419  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 420  * for a triangle.
 421  */
 422 static void tri_linear_coeff( struct setup_context *setup,
 423                               struct tgsi_interp_coef *coef,
 424                               uint vertSlot, uint i)
 425 {
 426    float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
 427    float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 428    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 429    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 430    float dadx = a * setup->oneoverarea;
 431    float dady = b * setup->oneoverarea;
 432
 433    assert(i <= 3);
 434
 435    coef->dadx[i] = dadx;
 436    coef->dady[i] = dady;
 437
 438    /* calculate a0 as the value which would be sampled for the
 439     * fragment at (0,0), taking into account that we want to sample at
 440     * pixel centers, in other words (0.5, 0.5).
 441     *
 442     * this is neat but unfortunately not a good way to do things for
 443     * triangles with very large values of dadx or dady as it will
 444     * result in the subtraction and re-addition from a0 of a very
 445     * large number, which means we'll end up loosing a lot of the
 446     * fractional bits and precision from a0.  the way to fix this is
 447     * to define a0 as the sample at a pixel center somewhere near vmin
 448     * instead - i'll switch to this later.
 449     */
 450    coef->a0[i] = (setup->vmin[vertSlot][i] -
 451                   (dadx * (setup->vmin[0][0] - 0.5f) +
 452                    dady * (setup->vmin[0][1] - 0.5f)));
 453
 454    /*
 455    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 456                 slot, "xyzw"[i],
 457                 setup->coef[slot].a0[i],
 458                 setup->coef[slot].dadx[i],
 459                 setup->coef[slot].dady[i]);
 460    */
 461 }
 462
 463
 464 /**
 465  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 466  * for a triangle.
 467  * We basically multiply the vertex value by 1/w before computing
 468  * the plane coefficients (a0, dadx, dady).
 469  * Later, when we compute the value at a particular fragment position we'll
 470  * divide the interpolated value by the interpolated W at that fragment.
 471  */
 472 static void tri_persp_coeff( struct setup_context *setup,
 473                              struct tgsi_interp_coef *coef,
 474                              uint vertSlot, uint i)
 475 {
 476    /* premultiply by 1/w  (v[0][3] is always W):
 477     */
 478    float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
 479    float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
 480    float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
 481    float botda = mida - mina;
 482    float majda = maxa - mina;
 483    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 484    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 485    float dadx = a * setup->oneoverarea;
 486    float dady = b * setup->oneoverarea;
 487
 488    /*
 489    debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
 490                 setup->vmin[vertSlot][i],
 491                 setup->vmid[vertSlot][i],
 492                 setup->vmax[vertSlot][i]
 493           );
 494    */
 495    assert(i <= 3);
 496
 497    coef->dadx[i] = dadx;
 498    coef->dady[i] = dady;
 499    coef->a0[i] = (mina -
 500                   (dadx * (setup->vmin[0][0] - 0.5f) +
 501                    dady * (setup->vmin[0][1] - 0.5f)));
 502 }
 503
 504
 505 /**
 506  * Special coefficient setup for gl_FragCoord.
 507  * X and Y are trivial, though Y has to be inverted for OpenGL.
 508  * Z and W are copied from posCoef which should have already been computed.
 509  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 510  */
 511 static void
 512 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 513 {
 514    /*X*/
 515    setup->coef[slot].a0[0] = 0;
 516    setup->coef[slot].dadx[0] = 1.0;
 517    setup->coef[slot].dady[0] = 0.0;
 518    /*Y*/
 519    if (setup->softpipe->rasterizer->origin_lower_left) {
 520       /* y=0=bottom */
 521       const int winHeight = setup->softpipe->framebuffer.height;
 522       setup->coef[slot].a0[1] = (float) (winHeight - 1);
 523       setup->coef[slot].dady[1] = -1.0;
 524    }
 525    else {
 526       /* y=0=top */
 527       setup->coef[slot].a0[1] = 0.0;
 528       setup->coef[slot].dady[1] = 1.0;
 529    }
 530    setup->coef[slot].dadx[1] = 0.0;
 531    /*Z*/
 532    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
 533    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
 534    setup->coef[slot].dady[2] = setup->posCoef.dady[2];
 535    /*W*/
 536    setup->coef[slot].a0[3] = setup->posCoef.a0[3];
 537    setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
 538    setup->coef[slot].dady[3] = setup->posCoef.dady[3];
 539 }
 540
 541
 542
 543 /**
 544  * Compute the setup->coef[] array dadx, dady, a0 values.
 545  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
 546  */
 547 static void setup_tri_coefficients( struct setup_context *setup )
 548 {
 549    struct softpipe_context *softpipe = setup->softpipe;
 550    const struct sp_fragment_shader *spfs = softpipe->fs;
 551    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 552    uint fragSlot;
 553
 554    /* z and w are done by linear interpolation:
 555     */
 556    tri_linear_coeff(setup, &setup->posCoef, 0, 2);
 557    tri_linear_coeff(setup, &setup->posCoef, 0, 3);
 558
 559    /* setup interpolation for all the remaining attributes:
 560     */
 561    for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
 562       const uint vertSlot = vinfo->src_index[fragSlot];
 563       uint j;
 564
 565       switch (vinfo->interp_mode[fragSlot]) {
 566       case INTERP_CONSTANT:
 567          for (j = 0; j < NUM_CHANNELS; j++)
 568             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 569          break;
 570       case INTERP_LINEAR:
 571          for (j = 0; j < NUM_CHANNELS; j++)
 572             tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 573          break;
 574       case INTERP_PERSPECTIVE:
 575          for (j = 0; j < NUM_CHANNELS; j++)
 576             tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 577          break;
 578       case INTERP_POS:
 579          setup_fragcoord_coeff(setup, fragSlot);
 580          break;
 581       default:
 582          assert(0);
 583       }
 584
 585       if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
 586          /* FOG.y = front/back facing  XXX fix this */
 587          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
 588          setup->coef[fragSlot].dadx[1] = 0.0;
 589          setup->coef[fragSlot].dady[1] = 0.0;
 590       }
 591    }
 592 }
 593
 594
 595
 596 static void setup_tri_edges( struct setup_context *setup )
 597 {
 598    float vmin_x = setup->vmin[0][0] + 0.5f;
 599    float vmid_x = setup->vmid[0][0] + 0.5f;
 600
 601    float vmin_y = setup->vmin[0][1] - 0.5f;
 602    float vmid_y = setup->vmid[0][1] - 0.5f;
 603    float vmax_y = setup->vmax[0][1] - 0.5f;
 604
 605    setup->emaj.sy = CEILF(vmin_y);
 606    setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy);
 607    setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
 608    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
 609
 610    setup->etop.sy = CEILF(vmid_y);
 611    setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy);
 612    setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
 613    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
 614
 615    setup->ebot.sy = CEILF(vmin_y);
 616    setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy);
 617    setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
 618    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
 619 }
 620
 621
 622 /**
 623  * Render the upper or lower half of a triangle.
 624  * Scissoring/cliprect is applied here too.
 625  */
 626 static void subtriangle( struct setup_context *setup,
 627                          struct edge *eleft,
 628                          struct edge *eright,
 629                          unsigned lines )
 630 {
 631    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
 632    const int minx = (int) cliprect->minx;
 633    const int maxx = (int) cliprect->maxx;
 634    const int miny = (int) cliprect->miny;
 635    const int maxy = (int) cliprect->maxy;
 636    int y, start_y, finish_y;
 637    int sy = (int)eleft->sy;
 638
 639    assert((int)eleft->sy == (int) eright->sy);
 640
 641    /* clip top/bottom */
 642    start_y = sy;
 643    finish_y = sy + lines;
 644
 645    if (start_y < miny)
 646       start_y = miny;
 647
 648    if (finish_y > maxy)
 649       finish_y = maxy;
 650
 651    start_y -= sy;
 652    finish_y -= sy;
 653
 654    /*
 655    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
 656    */
 657
 658    for (y = start_y; y < finish_y; y++) {
 659
 660       /* avoid accumulating adds as floats don't have the precision to
 661        * accurately iterate large triangle edges that way.  luckily we
 662        * can just multiply these days.
 663        *
 664        * this is all drowned out by the attribute interpolation anyway.
 665        */
 666       int left = (int)(eleft->sx + y * eleft->dxdy);
 667       int right = (int)(eright->sx + y * eright->dxdy);
 668
 669       /* clip left/right */
 670       if (left < minx)
 671          left = minx;
 672       if (right > maxx)
 673          right = maxx;
 674
 675       if (left < right) {
 676          int _y = sy + y;
 677          if (block(_y) != setup->span.y) {
 678             flush_spans(setup);
 679             setup->span.y = block(_y);
 680          }
 681
 682          setup->span.left[_y&1] = left;
 683          setup->span.right[_y&1] = right;
 684          setup->span.y_flags |= 1<<(_y&1);
 685       }
 686    }
 687
 688
 689    /* save the values so that emaj can be restarted:
 690     */
 691    eleft->sx += lines * eleft->dxdy;
 692    eright->sx += lines * eright->dxdy;
 693    eleft->sy += lines;
 694    eright->sy += lines;
 695 }
 696
 697
 698 /**
 699  * Recalculate prim's determinant.  This is needed as we don't have
 700  * get this information through the vbuf_render interface & we must
 701  * calculate it here.
 702  */
 703 static float
 704 calc_det( const float (*v0)[4],
 705           const float (*v1)[4],
 706           const float (*v2)[4] )
 707 {
 708    /* edge vectors e = v0 - v2, f = v1 - v2 */
 709    const float ex = v0[0][0] - v2[0][0];
 710    const float ey = v0[0][1] - v2[0][1];
 711    const float fx = v1[0][0] - v2[0][0];
 712    const float fy = v1[0][1] - v2[0][1];
 713
 714    /* det = cross(e,f).z */
 715    return ex * fy - ey * fx;
 716 }
 717
 718
 719 /**
 720  * Do setup for triangle rasterization, then render the triangle.
 721  */
 722 void setup_tri( struct setup_context *setup,
 723                 const float (*v0)[4],
 724                 const float (*v1)[4],
 725                 const float (*v2)[4] )
 726 {
 727    float det = calc_det(v0, v1, v2);
 728
 729    /*
 730    debug_printf("%s\n", __FUNCTION__ );
 731    */
 732
 733 #if DEBUG_FRAGS
 734    setup->numFragsEmitted = 0;
 735    setup->numFragsWritten = 0;
 736 #endif
 737
 738
 739
 740    if (cull_tri( setup, det ))
 741       return;
 742
 743    setup_sort_vertices( setup, det, v0, v1, v2 );
 744    setup_tri_coefficients( setup );
 745    setup_tri_edges( setup );
 746
 747    setup->quad.prim = PRIM_TRI;
 748
 749    setup->span.y = 0;
 750    setup->span.y_flags = 0;
 751    setup->span.right[0] = 0;
 752    setup->span.right[1] = 0;
 753    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
 754
 755    /*   init_constant_attribs( setup ); */
 756
 757    if (setup->oneoverarea < 0.0) {
 758       /* emaj on left:
 759        */
 760       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
 761       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
 762    }
 763    else {
 764       /* emaj on right:
 765        */
 766       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
 767       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
 768    }
 769
 770    flush_spans( setup );
 771
 772 #if DEBUG_FRAGS
 773    printf("Tri: %u frags emitted, %u written\n",
 774           setup->numFragsEmitted,
 775           setup->numFragsWritten);
 776 #endif
 777 }
 778
 779
 780
 781 /**
 782  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 783  * for a line.
 784  */
 785 static void
 786 line_linear_coeff(struct setup_context *setup,
 787                   struct tgsi_interp_coef *coef,
 788                   uint vertSlot, uint i)
 789 {
 790    const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 791    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 792    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 793    coef->dadx[i] = dadx;
 794    coef->dady[i] = dady;
 795    coef->a0[i] = (setup->vmin[vertSlot][i] -
 796                   (dadx * (setup->vmin[0][0] - 0.5f) +
 797                    dady * (setup->vmin[0][1] - 0.5f)));
 798 }
 799
 800
 801 /**
 802  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 803  * for a line.
 804  */
 805 static void
 806 line_persp_coeff(struct setup_context *setup,
 807                   struct tgsi_interp_coef *coef,
 808                   uint vertSlot, uint i)
 809 {
 810    /* XXX double-check/verify this arithmetic */
 811    const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
 812    const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
 813    const float da = a1 - a0;
 814    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 815    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 816    coef->dadx[i] = dadx;
 817    coef->dady[i] = dady;
 818    coef->a0[i] = (setup->vmin[vertSlot][i] -
 819                   (dadx * (setup->vmin[0][0] - 0.5f) +
 820                    dady * (setup->vmin[0][1] - 0.5f)));
 821 }
 822
 823
 824 /**
 825  * Compute the setup->coef[] array dadx, dady, a0 values.
 826  * Must be called after setup->vmin,vmax are initialized.
 827  */
 828 static INLINE void
 829 setup_line_coefficients(struct setup_context *setup,
 830                         const float (*v0)[4],
 831                         const float (*v1)[4])
 832 {
 833    struct softpipe_context *softpipe = setup->softpipe;
 834    const struct sp_fragment_shader *spfs = softpipe->fs;
 835    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 836    uint fragSlot;
 837
 838    /* use setup->vmin, vmax to point to vertices */
 839    setup->vprovoke = v1;
 840    setup->vmin = v0;
 841    setup->vmax = v1;
 842
 843    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
 844    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
 845    /* NOTE: this is not really 1/area */
 846    setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx +
 847                                 setup->emaj.dy * setup->emaj.dy);
 848
 849    /* z and w are done by linear interpolation:
 850     */
 851    line_linear_coeff(setup, &setup->posCoef, 0, 2);
 852    line_linear_coeff(setup, &setup->posCoef, 0, 3);
 853
 854    /* setup interpolation for all the remaining attributes:
 855     */
 856    for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
 857       const uint vertSlot = vinfo->src_index[fragSlot];
 858       uint j;
 859
 860       switch (vinfo->interp_mode[fragSlot]) {
 861       case INTERP_CONSTANT:
 862          for (j = 0; j < NUM_CHANNELS; j++)
 863             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 864          break;
 865       case INTERP_LINEAR:
 866          for (j = 0; j < NUM_CHANNELS; j++)
 867             line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 868          break;
 869       case INTERP_PERSPECTIVE:
 870          for (j = 0; j < NUM_CHANNELS; j++)
 871             line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 872          break;
 873       case INTERP_POS:
 874          setup_fragcoord_coeff(setup, fragSlot);
 875          break;
 876       default:
 877          assert(0);
 878       }
 879
 880       if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
 881          /* FOG.y = front/back facing  XXX fix this */
 882          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
 883          setup->coef[fragSlot].dadx[1] = 0.0;
 884          setup->coef[fragSlot].dady[1] = 0.0;
 885       }
 886    }
 887 }
 888
 889
 890 /**
 891  * Plot a pixel in a line segment.
 892  */
 893 static INLINE void
 894 plot(struct setup_context *setup, int x, int y)
 895 {
 896    const int iy = y & 1;
 897    const int ix = x & 1;
 898    const int quadX = x - ix;
 899    const int quadY = y - iy;
 900    const int mask = (1 << ix) << (2 * iy);
 901
 902    if (quadX != setup->quad.x0 ||
 903        quadY != setup->quad.y0)
 904    {
 905       /* flush prev quad, start new quad */
 906
 907       if (setup->quad.x0 != -1)
 908          clip_emit_quad(setup);
 909
 910       setup->quad.x0 = quadX;
 911       setup->quad.y0 = quadY;
 912       setup->quad.mask = 0x0;
 913    }
 914
 915    setup->quad.mask |= mask;
 916 }
 917
 918
 919 /**
 920  * Do setup for line rasterization, then render the line.
 921  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
 922  * to handle stippling and wide lines.
 923  */
 924 void
 925 setup_line(struct setup_context *setup,
 926            const float (*v0)[4],
 927            const float (*v1)[4])
 928 {
 929    int x0 = (int) v0[0][0];
 930    int x1 = (int) v1[0][0];
 931    int y0 = (int) v0[0][1];
 932    int y1 = (int) v1[0][1];
 933    int dx = x1 - x0;
 934    int dy = y1 - y0;
 935    int xstep, ystep;
 936
 937    if (dx == 0 && dy == 0)
 938       return;
 939
 940    setup_line_coefficients(setup, v0, v1);
 941
 942    if (dx < 0) {
 943       dx = -dx;   /* make positive */
 944       xstep = -1;
 945    }
 946    else {
 947       xstep = 1;
 948    }
 949
 950    if (dy < 0) {
 951       dy = -dy;   /* make positive */
 952       ystep = -1;
 953    }
 954    else {
 955       ystep = 1;
 956    }
 957
 958    assert(dx >= 0);
 959    assert(dy >= 0);
 960
 961    setup->quad.x0 = setup->quad.y0 = -1;
 962    setup->quad.mask = 0x0;
 963    setup->quad.prim = PRIM_LINE;
 964    /* XXX temporary: set coverage to 1.0 so the line appears
 965     * if AA mode happens to be enabled.
 966     */
 967    setup->quad.coverage[0] =
 968    setup->quad.coverage[1] =
 969    setup->quad.coverage[2] =
 970    setup->quad.coverage[3] = 1.0;
 971
 972    if (dx > dy) {
 973       /*** X-major line ***/
 974       int i;
 975       const int errorInc = dy + dy;
 976       int error = errorInc - dx;
 977       const int errorDec = error - dx;
 978
 979       for (i = 0; i < dx; i++) {
 980          plot(setup, x0, y0);
 981
 982          x0 += xstep;
 983          if (error < 0) {
 984             error += errorInc;
 985          }
 986          else {
 987             error += errorDec;
 988             y0 += ystep;
 989          }
 990       }
 991    }
 992    else {
 993       /*** Y-major line ***/
 994       int i;
 995       const int errorInc = dx + dx;
 996       int error = errorInc - dy;
 997       const int errorDec = error - dy;
 998
 999       for (i = 0; i < dy; i++) {
1000          plot(setup, x0, y0);
1001
1002          y0 += ystep;
1003          if (error < 0) {
1004             error += errorInc;
1005          }
1006          else {
1007             error += errorDec;
1008             x0 += xstep;
1009          }
1010       }
1011    }
1012
1013    /* draw final quad */
1014    if (setup->quad.mask) {
1015       clip_emit_quad(setup);
1016    }
1017 }
1018
1019
1020 static void
1021 point_persp_coeff(struct setup_context *setup,
1022                   const float (*vert)[4],
1023                   struct tgsi_interp_coef *coef,
1024                   uint vertSlot, uint i)
1025 {
1026    assert(i <= 3);
1027    coef->dadx[i] = 0.0F;
1028    coef->dady[i] = 0.0F;
1029    coef->a0[i] = vert[vertSlot][i] * vert[0][3];
1030 }
1031
1032
1033 /**
1034  * Do setup for point rasterization, then render the point.
1035  * Round or square points...
1036  * XXX could optimize a lot for 1-pixel points.
1037  */
1038 void
1039 setup_point( struct setup_context *setup,
1040              const float (*v0)[4] )
1041 {
1042    struct softpipe_context *softpipe = setup->softpipe;
1043    const struct sp_fragment_shader *spfs = softpipe->fs;
1044    const int sizeAttr = setup->softpipe->psize_slot;
1045    const float size
1046       = sizeAttr > 0 ? v0[sizeAttr][0]
1047       : setup->softpipe->rasterizer->point_size;
1048    const float halfSize = 0.5F * size;
1049    const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
1050    const float x = v0[0][0];  /* Note: data[0] is always position */
1051    const float y = v0[0][1];
1052    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
1053    uint fragSlot;
1054
1055    /* For points, all interpolants are constant-valued.
1056     * However, for point sprites, we'll need to setup texcoords appropriately.
1057     * XXX: which coefficients are the texcoords???
1058     * We may do point sprites as textured quads...
1059     *
1060     * KW: We don't know which coefficients are texcoords - ultimately
1061     * the choice of what interpolation mode to use for each attribute
1062     * should be determined by the fragment program, using
1063     * per-attribute declaration statements that include interpolation
1064     * mode as a parameter.  So either the fragment program will have
1065     * to be adjusted for pointsprite vs normal point behaviour, or
1066     * otherwise a special interpolation mode will have to be defined
1067     * which matches the required behaviour for point sprites.  But -
1068     * the latter is not a feature of normal hardware, and as such
1069     * probably should be ruled out on that basis.
1070     */
1071    setup->vprovoke = v0;
1072
1073    /* setup Z, W */
1074    const_coeff(setup, &setup->posCoef, 0, 2);
1075    const_coeff(setup, &setup->posCoef, 0, 3);
1076
1077    for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
1078       const uint vertSlot = vinfo->src_index[fragSlot];
1079       uint j;
1080
1081       switch (vinfo->interp_mode[fragSlot]) {
1082       case INTERP_CONSTANT:
1083          /* fall-through */
1084       case INTERP_LINEAR:
1085          for (j = 0; j < NUM_CHANNELS; j++)
1086             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1087          break;
1088       case INTERP_PERSPECTIVE:
1089          for (j = 0; j < NUM_CHANNELS; j++)
1090             point_persp_coeff(setup, setup->vprovoke,
1091                               &setup->coef[fragSlot], vertSlot, j);
1092          break;
1093       case INTERP_POS:
1094          setup_fragcoord_coeff(setup, fragSlot);
1095          break;
1096       default:
1097          assert(0);
1098       }
1099
1100       if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
1101          /* FOG.y = front/back facing  XXX fix this */
1102          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
1103          setup->coef[fragSlot].dadx[1] = 0.0;
1104          setup->coef[fragSlot].dady[1] = 0.0;
1105       }
1106    }
1107
1108    setup->quad.prim = PRIM_POINT;
1109
1110    if (halfSize <= 0.5 && !round) {
1111       /* special case for 1-pixel points */
1112       const int ix = ((int) x) & 1;
1113       const int iy = ((int) y) & 1;
1114       setup->quad.x0 = (int) x - ix;
1115       setup->quad.y0 = (int) y - iy;
1116       setup->quad.mask = (1 << ix) << (2 * iy);
1117       clip_emit_quad(setup);
1118    }
1119    else {
1120       if (round) {
1121          /* rounded points */
1122          const int ixmin = block((int) (x - halfSize));
1123          const int ixmax = block((int) (x + halfSize));
1124          const int iymin = block((int) (y - halfSize));
1125          const int iymax = block((int) (y + halfSize));
1126          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1127          const float rmax = halfSize + 0.7071F;
1128          const float rmin2 = MAX2(0.0F, rmin * rmin);
1129          const float rmax2 = rmax * rmax;
1130          const float cscale = 1.0F / (rmax2 - rmin2);
1131          int ix, iy;
1132
1133          for (iy = iymin; iy <= iymax; iy += 2) {
1134             for (ix = ixmin; ix <= ixmax; ix += 2) {
1135                float dx, dy, dist2, cover;
1136
1137                setup->quad.mask = 0x0;
1138
1139                dx = (ix + 0.5f) - x;
1140                dy = (iy + 0.5f) - y;
1141                dist2 = dx * dx + dy * dy;
1142                if (dist2 <= rmax2) {
1143                   cover = 1.0F - (dist2 - rmin2) * cscale;
1144                   setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1145                   setup->quad.mask |= MASK_TOP_LEFT;
1146                }
1147
1148                dx = (ix + 1.5f) - x;
1149                dy = (iy + 0.5f) - y;
1150                dist2 = dx * dx + dy * dy;
1151                if (dist2 <= rmax2) {
1152                   cover = 1.0F - (dist2 - rmin2) * cscale;
1153                   setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1154                   setup->quad.mask |= MASK_TOP_RIGHT;
1155                }
1156
1157                dx = (ix + 0.5f) - x;
1158                dy = (iy + 1.5f) - y;
1159                dist2 = dx * dx + dy * dy;
1160                if (dist2 <= rmax2) {
1161                   cover = 1.0F - (dist2 - rmin2) * cscale;
1162                   setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1163                   setup->quad.mask |= MASK_BOTTOM_LEFT;
1164                }
1165
1166                dx = (ix + 1.5f) - x;
1167                dy = (iy + 1.5f) - y;
1168                dist2 = dx * dx + dy * dy;
1169                if (dist2 <= rmax2) {
1170                   cover = 1.0F - (dist2 - rmin2) * cscale;
1171                   setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1172                   setup->quad.mask |= MASK_BOTTOM_RIGHT;
1173                }
1174
1175                if (setup->quad.mask) {
1176                   setup->quad.x0 = ix;
1177                   setup->quad.y0 = iy;
1178                   clip_emit_quad(setup);
1179                }
1180             }
1181          }
1182       }
1183       else {
1184          /* square points */
1185          const int xmin = (int) (x + 0.75 - halfSize);
1186          const int ymin = (int) (y + 0.25 - halfSize);
1187          const int xmax = xmin + (int) size;
1188          const int ymax = ymin + (int) size;
1189          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1190          const int ixmin = block(xmin);
1191          const int ixmax = block(xmax - 1);
1192          const int iymin = block(ymin);
1193          const int iymax = block(ymax - 1);
1194          int ix, iy;
1195
1196          /*
1197          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1198          */
1199          for (iy = iymin; iy <= iymax; iy += 2) {
1200             uint rowMask = 0xf;
1201             if (iy < ymin) {
1202                /* above the top edge */
1203                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1204             }
1205             if (iy + 1 >= ymax) {
1206                /* below the bottom edge */
1207                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1208             }
1209
1210             for (ix = ixmin; ix <= ixmax; ix += 2) {
1211                uint mask = rowMask;
1212
1213                if (ix < xmin) {
1214                   /* fragment is past left edge of point, turn off left bits */
1215                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1216                }
1217                if (ix + 1 >= xmax) {
1218                   /* past the right edge */
1219                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1220                }
1221
1222                setup->quad.mask = mask;
1223                setup->quad.x0 = ix;
1224                setup->quad.y0 = iy;
1225                clip_emit_quad(setup);
1226             }
1227          }
1228       }
1229    }
1230 }
1231
1232 void setup_prepare( struct setup_context *setup )
1233 {
1234    struct softpipe_context *sp = setup->softpipe;
1235    unsigned i;
1236
1237    if (sp->dirty) {
1238       softpipe_update_derived(sp);
1239    }
1240
1241    /* Mark surfaces as defined now */
1242    for (i = 0; i < sp->framebuffer.num_cbufs; i++){
1243       if (sp->framebuffer.cbufs[i]) {
1244          sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
1245       }
1246    }
1247    if (sp->framebuffer.zsbuf) {
1248       sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
1249    }
1250
1251    {
1252       const struct sp_fragment_shader *fs = setup->softpipe->fs;
1253       setup->quad.nr_attrs = fs->info.num_inputs;
1254       sp->quad.first->begin(sp->quad.first);
1255    }
1256
1257    setup->winding = sp->rasterizer->cull_mode;
1258 }
1259
1260
1261
1262 void setup_destroy_context( struct setup_context *setup )
1263 {
1264    FREE( setup );
1265 }
1266
1267
1268 /**
1269  * Create a new primitive setup/render stage.
1270  */
1271 struct setup_context *setup_create_context( struct softpipe_context *softpipe )
1272 {
1273    struct setup_context *setup = CALLOC_STRUCT(setup_context);
1274
1275    setup->softpipe = softpipe;
1276
1277    setup->quad.coef = setup->coef;
1278    setup->quad.posCoef = &setup->posCoef;
1279
1280    return setup;
1281 }