src/gallium/drivers/softpipe/sp_prim_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30  *
  31  * \author  Keith Whitwell <keith@tungstengraphics.com>
  32  * \author  Brian Paul
  33  */
  34
  35
  36 #include "sp_context.h"
  37 #include "sp_headers.h"
  38 #include "sp_quad.h"
  39 #include "sp_state.h"
  40 #include "sp_prim_setup.h"
  41 #include "draw/draw_private.h"
  42 #include "draw/draw_vertex.h"
  43 #include "pipe/p_util.h"
  44 #include "pipe/p_shader_tokens.h"
  45
  46 #define DEBUG_VERTS 0
  47
  48 /**
  49  * Triangle edge info
  50  */
  51 struct edge {
  52    float dx;            /**< X(v1) - X(v0), used only during setup */
  53    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  54    float dxdy;          /**< dx/dy */
  55    float sx, sy;        /**< first sample point coord */
  56    int lines;           /**< number of lines on this edge */
  57 };
  58
  59
  60 /**
  61  * Triangle setup info (derived from draw_stage).
  62  * Also used for line drawing (taking some liberties).
  63  */
  64 struct setup_stage {
  65    struct draw_stage stage; /**< This must be first (base class) */
  66
  67    struct softpipe_context *softpipe;
  68
  69    /* Vertices are just an array of floats making up each attribute in
  70     * turn.  Currently fixed at 4 floats, but should change in time.
  71     * Codegen will help cope with this.
  72     */
  73    const struct vertex_header *vmax;
  74    const struct vertex_header *vmid;
  75    const struct vertex_header *vmin;
  76    const struct vertex_header *vprovoke;
  77
  78    struct edge ebot;
  79    struct edge etop;
  80    struct edge emaj;
  81
  82    float oneoverarea;
  83
  84    struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
  85    struct tgsi_interp_coef posCoef;  /* For Z, W */
  86    struct quad_header quad;
  87
  88    struct {
  89       int left[2];   /**< [0] = row0, [1] = row1 */
  90       int right[2];
  91       int y;
  92       unsigned y_flags;
  93       unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
  94    } span;
  95 };
  96
  97
  98
  99 /**
 100  * Basically a cast wrapper.
 101  */
 102 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
 103 {
 104    return (struct setup_stage *)stage;
 105 }
 106
 107
 108 /**
 109  * Clip setup->quad against the scissor/surface bounds.
 110  */
 111 static INLINE void
 112 quad_clip(struct setup_stage *setup)
 113 {
 114    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
 115    const int minx = (int) cliprect->minx;
 116    const int maxx = (int) cliprect->maxx;
 117    const int miny = (int) cliprect->miny;
 118    const int maxy = (int) cliprect->maxy;
 119
 120    if (setup->quad.x0 >= maxx ||
 121        setup->quad.y0 >= maxy ||
 122        setup->quad.x0 + 1 < minx ||
 123        setup->quad.y0 + 1 < miny) {
 124       /* totally clipped */
 125       setup->quad.mask = 0x0;
 126       return;
 127    }
 128    if (setup->quad.x0 < minx)
 129       setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
 130    if (setup->quad.y0 < miny)
 131       setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 132    if (setup->quad.x0 == maxx - 1)
 133       setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
 134    if (setup->quad.y0 == maxy - 1)
 135       setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
 136 }
 137
 138
 139 /**
 140  * Emit a quad (pass to next stage) with clipping.
 141  */
 142 static INLINE void
 143 clip_emit_quad(struct setup_stage *setup)
 144 {
 145    quad_clip(setup);
 146    if (setup->quad.mask) {
 147       struct softpipe_context *sp = setup->softpipe;
 148       sp->quad.first->run(sp->quad.first, &setup->quad);
 149    }
 150 }
 151
 152
 153 /**
 154  * Emit a quad (pass to next stage).  No clipping is done.
 155  */
 156 static INLINE void
 157 emit_quad( struct setup_stage *setup, int x, int y, unsigned mask )
 158 {
 159    struct softpipe_context *sp = setup->softpipe;
 160    setup->quad.x0 = x;
 161    setup->quad.y0 = y;
 162    setup->quad.mask = mask;
 163    sp->quad.first->run(sp->quad.first, &setup->quad);
 164 }
 165
 166
 167 /**
 168  * Given an X or Y coordinate, return the block/quad coordinate that it
 169  * belongs to.
 170  */
 171 static INLINE int block( int x )
 172 {
 173    return x & ~1;
 174 }
 175
 176
 177 /**
 178  * Compute mask which indicates which pixels in the 2x2 quad are actually inside
 179  * the triangle's bounds.
 180  *
 181  * this is pretty nasty...  may need to rework flush_spans again to
 182  * fix it, if possible.
 183  */
 184 static unsigned calculate_mask( struct setup_stage *setup, int x )
 185 {
 186    unsigned mask = 0x0;
 187
 188    if (x >= setup->span.left[0] && x < setup->span.right[0])
 189       mask |= MASK_TOP_LEFT;
 190
 191    if (x >= setup->span.left[1] && x < setup->span.right[1])
 192       mask |= MASK_BOTTOM_LEFT;
 193
 194    if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0])
 195       mask |= MASK_TOP_RIGHT;
 196
 197    if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1])
 198       mask |= MASK_BOTTOM_RIGHT;
 199
 200    return mask;
 201 }
 202
 203
 204 /**
 205  * Render a horizontal span of quads
 206  */
 207 static void flush_spans( struct setup_stage *setup )
 208 {
 209    int minleft, maxright;
 210    int x;
 211
 212    switch (setup->span.y_flags) {
 213    case 0x3:
 214       /* both odd and even lines written (both quad rows) */
 215       minleft = MIN2(setup->span.left[0], setup->span.left[1]);
 216       maxright = MAX2(setup->span.right[0], setup->span.right[1]);
 217       break;
 218
 219    case 0x1:
 220       /* only even line written (quad top row) */
 221       minleft = setup->span.left[0];
 222       maxright = setup->span.right[0];
 223       break;
 224
 225    case 0x2:
 226       /* only odd line written (quad bottom row) */
 227       minleft = setup->span.left[1];
 228       maxright = setup->span.right[1];
 229       break;
 230
 231    default:
 232       return;
 233    }
 234
 235    /* XXX this loop could be moved into the above switch cases and
 236     * calculate_mask() could be simplified a bit...
 237     */
 238    for (x = block(minleft); x <= block(maxright); x += 2) {
 239       emit_quad( setup, x, setup->span.y,
 240                  calculate_mask( setup, x ) );
 241    }
 242
 243    setup->span.y = 0;
 244    setup->span.y_flags = 0;
 245    setup->span.right[0] = 0;
 246    setup->span.right[1] = 0;
 247 }
 248
 249 #if DEBUG_VERTS
 250 static void print_vertex(const struct setup_stage *setup,
 251                          const struct vertex_header *v)
 252 {
 253    int i;
 254    debug_printf("Vertex: (%p)\n", v);
 255    for (i = 0; i < setup->quad.nr_attrs; i++) {
 256       debug_printf("  %d: %f %f %f %f\n",  i,
 257               v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
 258    }
 259 }
 260 #endif
 261
 262 static boolean setup_sort_vertices( struct setup_stage *setup,
 263                                       const struct prim_header *prim )
 264 {
 265    const struct vertex_header *v0 = prim->v[0];
 266    const struct vertex_header *v1 = prim->v[1];
 267    const struct vertex_header *v2 = prim->v[2];
 268
 269 #if DEBUG_VERTS
 270    debug_printf("Triangle:\n");
 271    print_vertex(setup, v0);
 272    print_vertex(setup, v1);
 273    print_vertex(setup, v2);
 274 #endif
 275
 276    setup->vprovoke = v2;
 277
 278    /* determine bottom to top order of vertices */
 279    {
 280       float y0 = v0->data[0][1];
 281       float y1 = v1->data[0][1];
 282       float y2 = v2->data[0][1];
 283       if (y0 <= y1) {
 284          if (y1 <= y2) {
 285             /* y0<=y1<=y2 */
 286             setup->vmin = v0;
 287             setup->vmid = v1;
 288             setup->vmax = v2;
 289          }
 290          else if (y2 <= y0) {
 291             /* y2<=y0<=y1 */
 292             setup->vmin = v2;
 293             setup->vmid = v0;
 294             setup->vmax = v1;
 295          }
 296          else {
 297             /* y0<=y2<=y1 */
 298             setup->vmin = v0;
 299             setup->vmid = v2;
 300             setup->vmax = v1;
 301          }
 302       }
 303       else {
 304          if (y0 <= y2) {
 305             /* y1<=y0<=y2 */
 306             setup->vmin = v1;
 307             setup->vmid = v0;
 308             setup->vmax = v2;
 309          }
 310          else if (y2 <= y1) {
 311             /* y2<=y1<=y0 */
 312             setup->vmin = v2;
 313             setup->vmid = v1;
 314             setup->vmax = v0;
 315          }
 316          else {
 317             /* y1<=y2<=y0 */
 318             setup->vmin = v1;
 319             setup->vmid = v2;
 320             setup->vmax = v0;
 321          }
 322       }
 323    }
 324
 325    setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0];
 326    setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1];
 327    setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0];
 328    setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1];
 329    setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0];
 330    setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1];
 331
 332    /*
 333     * Compute triangle's area.  Use 1/area to compute partial
 334     * derivatives of attributes later.
 335     *
 336     * The area will be the same as prim->det, but the sign may be
 337     * different depending on how the vertices get sorted above.
 338     *
 339     * To determine whether the primitive is front or back facing we
 340     * use the prim->det value because its sign is correct.
 341     */
 342    {
 343       const float area = (setup->emaj.dx * setup->ebot.dy -
 344                             setup->ebot.dx * setup->emaj.dy);
 345
 346       setup->oneoverarea = 1.0f / area;
 347       /*
 348       debug_printf("%s one-over-area %f  area %f  det %f\n",
 349                    __FUNCTION__, setup->oneoverarea, area, prim->det );
 350       */
 351    }
 352
 353    /* We need to know if this is a front or back-facing triangle for:
 354     *  - the GLSL gl_FrontFacing fragment attribute (bool)
 355     *  - two-sided stencil test
 356     */
 357    setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
 358
 359    return TRUE;
 360 }
 361
 362
 363 /**
 364  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 365  * The value value comes from vertex->data[slot][i].
 366  * The result will be put into setup->coef[slot].a0[i].
 367  * \param slot  which attribute slot
 368  * \param i  which component of the slot (0..3)
 369  */
 370 static void const_coeff( struct setup_stage *setup,
 371                          struct tgsi_interp_coef *coef,
 372                          uint vertSlot, uint i)
 373 {
 374    assert(i <= 3);
 375
 376    coef->dadx[i] = 0;
 377    coef->dady[i] = 0;
 378
 379    /* need provoking vertex info!
 380     */
 381    coef->a0[i] = setup->vprovoke->data[vertSlot][i];
 382 }
 383
 384
 385 /**
 386  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 387  * for a triangle.
 388  */
 389 static void tri_linear_coeff( struct setup_stage *setup,
 390                               struct tgsi_interp_coef *coef,
 391                               uint vertSlot, uint i)
 392 {
 393    float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
 394    float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
 395    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 396    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 397    float dadx = a * setup->oneoverarea;
 398    float dady = b * setup->oneoverarea;
 399
 400    assert(i <= 3);
 401
 402    coef->dadx[i] = dadx;
 403    coef->dady[i] = dady;
 404
 405    /* calculate a0 as the value which would be sampled for the
 406     * fragment at (0,0), taking into account that we want to sample at
 407     * pixel centers, in other words (0.5, 0.5).
 408     *
 409     * this is neat but unfortunately not a good way to do things for
 410     * triangles with very large values of dadx or dady as it will
 411     * result in the subtraction and re-addition from a0 of a very
 412     * large number, which means we'll end up loosing a lot of the
 413     * fractional bits and precision from a0.  the way to fix this is
 414     * to define a0 as the sample at a pixel center somewhere near vmin
 415     * instead - i'll switch to this later.
 416     */
 417    coef->a0[i] = (setup->vmin->data[vertSlot][i] -
 418                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 419                    dady * (setup->vmin->data[0][1] - 0.5f)));
 420
 421    /*
 422    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 423                 slot, "xyzw"[i],
 424                 setup->coef[slot].a0[i],
 425                 setup->coef[slot].dadx[i],
 426                 setup->coef[slot].dady[i]);
 427    */
 428 }
 429
 430
 431 /**
 432  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 433  * for a triangle.
 434  * We basically multiply the vertex value by 1/w before computing
 435  * the plane coefficients (a0, dadx, dady).
 436  * Later, when we compute the value at a particular fragment position we'll
 437  * divide the interpolated value by the interpolated W at that fragment.
 438  */
 439 static void tri_persp_coeff( struct setup_stage *setup,
 440                              struct tgsi_interp_coef *coef,
 441                              uint vertSlot, uint i)
 442 {
 443    /* premultiply by 1/w  (v->data[0][3] is always W):
 444     */
 445    float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
 446    float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3];
 447    float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3];
 448    float botda = mida - mina;
 449    float majda = maxa - mina;
 450    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 451    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 452    float dadx = a * setup->oneoverarea;
 453    float dady = b * setup->oneoverarea;
 454
 455    /*
 456    debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
 457                 setup->vmin->data[vertSlot][i],
 458                 setup->vmid->data[vertSlot][i],
 459                 setup->vmax->data[vertSlot][i]
 460           );
 461    */
 462    assert(i <= 3);
 463
 464    coef->dadx[i] = dadx;
 465    coef->dady[i] = dady;
 466    coef->a0[i] = (mina -
 467                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 468                    dady * (setup->vmin->data[0][1] - 0.5f)));
 469 }
 470
 471
 472 /**
 473  * Special coefficient setup for gl_FragCoord.
 474  * X and Y are trivial, though Y has to be inverted for OpenGL.
 475  * Z and W are copied from posCoef which should have already been computed.
 476  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 477  */
 478 static void
 479 setup_fragcoord_coeff(struct setup_stage *setup, uint slot)
 480 {
 481    /*X*/
 482    setup->coef[slot].a0[0] = 0;
 483    setup->coef[slot].dadx[0] = 1.0;
 484    setup->coef[slot].dady[0] = 0.0;
 485    /*Y*/
 486    if (setup->softpipe->rasterizer->origin_lower_left) {
 487       /* y=0=bottom */
 488       const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height;
 489       setup->coef[slot].a0[1] = (float) (winHeight - 1);
 490       setup->coef[slot].dady[1] = -1.0;
 491    }
 492    else {
 493       /* y=0=top */
 494       setup->coef[slot].a0[1] = 0.0;
 495       setup->coef[slot].dady[1] = 1.0;
 496    }
 497    setup->coef[slot].dadx[1] = 0.0;
 498    /*Z*/
 499    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
 500    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
 501    setup->coef[slot].dady[2] = setup->posCoef.dady[2];
 502    /*W*/
 503    setup->coef[slot].a0[3] = setup->posCoef.a0[3];
 504    setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
 505    setup->coef[slot].dady[3] = setup->posCoef.dady[3];
 506 }
 507
 508
 509
 510 /**
 511  * Compute the setup->coef[] array dadx, dady, a0 values.
 512  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
 513  */
 514 static void setup_tri_coefficients( struct setup_stage *setup )
 515 {
 516    struct softpipe_context *softpipe = setup->softpipe;
 517    const struct pipe_shader_state *fs = &softpipe->fs->shader;
 518    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 519    uint fragSlot;
 520
 521    /* z and w are done by linear interpolation:
 522     */
 523    tri_linear_coeff(setup, &setup->posCoef, 0, 2);
 524    tri_linear_coeff(setup, &setup->posCoef, 0, 3);
 525
 526    /* setup interpolation for all the remaining attributes:
 527     */
 528    for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) {
 529       const uint vertSlot = vinfo->src_index[fragSlot];
 530       uint j;
 531
 532       switch (vinfo->interp_mode[fragSlot]) {
 533       case INTERP_CONSTANT:
 534          for (j = 0; j < NUM_CHANNELS; j++)
 535             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 536          break;
 537       case INTERP_LINEAR:
 538          for (j = 0; j < NUM_CHANNELS; j++)
 539             tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 540          break;
 541       case INTERP_PERSPECTIVE:
 542          for (j = 0; j < NUM_CHANNELS; j++)
 543             tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 544          break;
 545       case INTERP_POS:
 546          setup_fragcoord_coeff(setup, fragSlot);
 547          break;
 548       default:
 549          assert(0);
 550       }
 551
 552       if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
 553          /* FOG.y = front/back facing  XXX fix this */
 554          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
 555          setup->coef[fragSlot].dadx[1] = 0.0;
 556          setup->coef[fragSlot].dady[1] = 0.0;
 557       }
 558    }
 559 }
 560
 561
 562
 563 static void setup_tri_edges( struct setup_stage *setup )
 564 {
 565    float vmin_x = setup->vmin->data[0][0] + 0.5f;
 566    float vmid_x = setup->vmid->data[0][0] + 0.5f;
 567
 568    float vmin_y = setup->vmin->data[0][1] - 0.5f;
 569    float vmid_y = setup->vmid->data[0][1] - 0.5f;
 570    float vmax_y = setup->vmax->data[0][1] - 0.5f;
 571
 572    setup->emaj.sy = CEILF(vmin_y);
 573    setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy);
 574    setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
 575    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
 576
 577    setup->etop.sy = CEILF(vmid_y);
 578    setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy);
 579    setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
 580    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
 581
 582    setup->ebot.sy = CEILF(vmin_y);
 583    setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy);
 584    setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
 585    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
 586 }
 587
 588
 589 /**
 590  * Render the upper or lower half of a triangle.
 591  * Scissoring/cliprect is applied here too.
 592  */
 593 static void subtriangle( struct setup_stage *setup,
 594                          struct edge *eleft,
 595                          struct edge *eright,
 596                          unsigned lines )
 597 {
 598    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
 599    const int minx = (int) cliprect->minx;
 600    const int maxx = (int) cliprect->maxx;
 601    const int miny = (int) cliprect->miny;
 602    const int maxy = (int) cliprect->maxy;
 603    int y, start_y, finish_y;
 604    int sy = (int)eleft->sy;
 605
 606    assert((int)eleft->sy == (int) eright->sy);
 607
 608    /* clip top/bottom */
 609    start_y = sy;
 610    finish_y = sy + lines;
 611
 612    if (start_y < miny)
 613       start_y = miny;
 614
 615    if (finish_y > maxy)
 616       finish_y = maxy;
 617
 618    start_y -= sy;
 619    finish_y -= sy;
 620
 621    /*
 622    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
 623    */
 624
 625    for (y = start_y; y < finish_y; y++) {
 626
 627       /* avoid accumulating adds as floats don't have the precision to
 628        * accurately iterate large triangle edges that way.  luckily we
 629        * can just multiply these days.
 630        *
 631        * this is all drowned out by the attribute interpolation anyway.
 632        */
 633       int left = (int)(eleft->sx + y * eleft->dxdy);
 634       int right = (int)(eright->sx + y * eright->dxdy);
 635
 636       /* clip left/right */
 637       if (left < minx)
 638          left = minx;
 639       if (right > maxx)
 640          right = maxx;
 641
 642       if (left < right) {
 643          int _y = sy + y;
 644          if (block(_y) != setup->span.y) {
 645             flush_spans(setup);
 646             setup->span.y = block(_y);
 647          }
 648
 649          setup->span.left[_y&1] = left;
 650          setup->span.right[_y&1] = right;
 651          setup->span.y_flags |= 1<<(_y&1);
 652       }
 653    }
 654
 655
 656    /* save the values so that emaj can be restarted:
 657     */
 658    eleft->sx += lines * eleft->dxdy;
 659    eright->sx += lines * eright->dxdy;
 660    eleft->sy += lines;
 661    eright->sy += lines;
 662 }
 663
 664
 665 /**
 666  * Do setup for triangle rasterization, then render the triangle.
 667  */
 668 static void setup_tri( struct draw_stage *stage,
 669                        struct prim_header *prim )
 670 {
 671    struct setup_stage *setup = setup_stage( stage );
 672
 673    /*
 674    debug_printf("%s\n", __FUNCTION__ );
 675    */
 676
 677    setup_sort_vertices( setup, prim );
 678    setup_tri_coefficients( setup );
 679    setup_tri_edges( setup );
 680
 681    setup->quad.prim = PRIM_TRI;
 682
 683    setup->span.y = 0;
 684    setup->span.y_flags = 0;
 685    setup->span.right[0] = 0;
 686    setup->span.right[1] = 0;
 687    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
 688
 689    /*   init_constant_attribs( setup ); */
 690
 691    if (setup->oneoverarea < 0.0) {
 692       /* emaj on left:
 693        */
 694       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
 695       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
 696    }
 697    else {
 698       /* emaj on right:
 699        */
 700       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
 701       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
 702    }
 703
 704    flush_spans( setup );
 705 }
 706
 707
 708
 709 /**
 710  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 711  * for a line.
 712  */
 713 static void
 714 line_linear_coeff(struct setup_stage *setup,
 715                   struct tgsi_interp_coef *coef,
 716                   uint vertSlot, uint i)
 717 {
 718    const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
 719    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 720    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 721    coef->dadx[i] = dadx;
 722    coef->dady[i] = dady;
 723    coef->a0[i] = (setup->vmin->data[vertSlot][i] -
 724                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 725                    dady * (setup->vmin->data[0][1] - 0.5f)));
 726 }
 727
 728
 729 /**
 730  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 731  * for a line.
 732  */
 733 static void
 734 line_persp_coeff(struct setup_stage *setup,
 735                   struct tgsi_interp_coef *coef,
 736                   uint vertSlot, uint i)
 737 {
 738    /* XXX double-check/verify this arithmetic */
 739    const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
 740    const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3];
 741    const float da = a1 - a0;
 742    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 743    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 744    coef->dadx[i] = dadx;
 745    coef->dady[i] = dady;
 746    coef->a0[i] = (setup->vmin->data[vertSlot][i] -
 747                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 748                    dady * (setup->vmin->data[0][1] - 0.5f)));
 749 }
 750
 751
 752 /**
 753  * Compute the setup->coef[] array dadx, dady, a0 values.
 754  * Must be called after setup->vmin,vmax are initialized.
 755  */
 756 static INLINE void
 757 setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
 758 {
 759    struct softpipe_context *softpipe = setup->softpipe;
 760    const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
 761    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 762    uint fragSlot;
 763
 764    /* use setup->vmin, vmax to point to vertices */
 765    setup->vprovoke = prim->v[1];
 766    setup->vmin = prim->v[0];
 767    setup->vmax = prim->v[1];
 768
 769    setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0];
 770    setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1];
 771    /* NOTE: this is not really 1/area */
 772    setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx +
 773                                 setup->emaj.dy * setup->emaj.dy);
 774
 775    /* z and w are done by linear interpolation:
 776     */
 777    line_linear_coeff(setup, &setup->posCoef, 0, 2);
 778    line_linear_coeff(setup, &setup->posCoef, 0, 3);
 779
 780    /* setup interpolation for all the remaining attributes:
 781     */
 782    for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) {
 783       const uint vertSlot = vinfo->src_index[fragSlot];
 784       uint j;
 785
 786       switch (vinfo->interp_mode[fragSlot]) {
 787       case INTERP_CONSTANT:
 788          for (j = 0; j < NUM_CHANNELS; j++)
 789             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 790          break;
 791       case INTERP_LINEAR:
 792          for (j = 0; j < NUM_CHANNELS; j++)
 793             line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 794          break;
 795       case INTERP_PERSPECTIVE:
 796          for (j = 0; j < NUM_CHANNELS; j++)
 797             line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 798          break;
 799       case INTERP_POS:
 800          setup_fragcoord_coeff(setup, fragSlot);
 801          break;
 802       default:
 803          assert(0);
 804       }
 805
 806       if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
 807          /* FOG.y = front/back facing  XXX fix this */
 808          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
 809          setup->coef[fragSlot].dadx[1] = 0.0;
 810          setup->coef[fragSlot].dady[1] = 0.0;
 811       }
 812    }
 813 }
 814
 815
 816 /**
 817  * Plot a pixel in a line segment.
 818  */
 819 static INLINE void
 820 plot(struct setup_stage *setup, int x, int y)
 821 {
 822    const int iy = y & 1;
 823    const int ix = x & 1;
 824    const int quadX = x - ix;
 825    const int quadY = y - iy;
 826    const int mask = (1 << ix) << (2 * iy);
 827
 828    if (quadX != setup->quad.x0 ||
 829        quadY != setup->quad.y0)
 830    {
 831       /* flush prev quad, start new quad */
 832
 833       if (setup->quad.x0 != -1)
 834          clip_emit_quad(setup);
 835
 836       setup->quad.x0 = quadX;
 837       setup->quad.y0 = quadY;
 838       setup->quad.mask = 0x0;
 839    }
 840
 841    setup->quad.mask |= mask;
 842 }
 843
 844
 845 /**
 846  * Do setup for line rasterization, then render the line.
 847  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
 848  * to handle stippling and wide lines.
 849  */
 850 static void
 851 setup_line(struct draw_stage *stage, struct prim_header *prim)
 852 {
 853    const struct vertex_header *v0 = prim->v[0];
 854    const struct vertex_header *v1 = prim->v[1];
 855    struct setup_stage *setup = setup_stage( stage );
 856    int x0 = (int) v0->data[0][0];
 857    int x1 = (int) v1->data[0][0];
 858    int y0 = (int) v0->data[0][1];
 859    int y1 = (int) v1->data[0][1];
 860    int dx = x1 - x0;
 861    int dy = y1 - y0;
 862    int xstep, ystep;
 863
 864    if (dx == 0 && dy == 0)
 865       return;
 866
 867    setup_line_coefficients(setup, prim);
 868
 869    if (dx < 0) {
 870       dx = -dx;   /* make positive */
 871       xstep = -1;
 872    }
 873    else {
 874       xstep = 1;
 875    }
 876
 877    if (dy < 0) {
 878       dy = -dy;   /* make positive */
 879       ystep = -1;
 880    }
 881    else {
 882       ystep = 1;
 883    }
 884
 885    assert(dx >= 0);
 886    assert(dy >= 0);
 887
 888    setup->quad.x0 = setup->quad.y0 = -1;
 889    setup->quad.mask = 0x0;
 890    setup->quad.prim = PRIM_LINE;
 891    /* XXX temporary: set coverage to 1.0 so the line appears
 892     * if AA mode happens to be enabled.
 893     */
 894    setup->quad.coverage[0] =
 895    setup->quad.coverage[1] =
 896    setup->quad.coverage[2] =
 897    setup->quad.coverage[3] = 1.0;
 898
 899    if (dx > dy) {
 900       /*** X-major line ***/
 901       int i;
 902       const int errorInc = dy + dy;
 903       int error = errorInc - dx;
 904       const int errorDec = error - dx;
 905
 906       for (i = 0; i < dx; i++) {
 907          plot(setup, x0, y0);
 908
 909          x0 += xstep;
 910          if (error < 0) {
 911             error += errorInc;
 912          }
 913          else {
 914             error += errorDec;
 915             y0 += ystep;
 916          }
 917       }
 918    }
 919    else {
 920       /*** Y-major line ***/
 921       int i;
 922       const int errorInc = dx + dx;
 923       int error = errorInc - dy;
 924       const int errorDec = error - dy;
 925
 926       for (i = 0; i < dy; i++) {
 927          plot(setup, x0, y0);
 928
 929          y0 += ystep;
 930          if (error < 0) {
 931             error += errorInc;
 932          }
 933          else {
 934             error += errorDec;
 935             x0 += xstep;
 936          }
 937       }
 938    }
 939
 940    /* draw final quad */
 941    if (setup->quad.mask) {
 942       clip_emit_quad(setup);
 943    }
 944 }
 945
 946
 947 static void
 948 point_persp_coeff(struct setup_stage *setup,
 949                   const struct vertex_header *vert,
 950                   struct tgsi_interp_coef *coef,
 951                   uint vertSlot, uint i)
 952 {
 953    assert(i <= 3);
 954    coef->dadx[i] = 0.0F;
 955    coef->dady[i] = 0.0F;
 956    coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3];
 957 }
 958
 959
 960 /**
 961  * Do setup for point rasterization, then render the point.
 962  * Round or square points...
 963  * XXX could optimize a lot for 1-pixel points.
 964  */
 965 static void
 966 setup_point(struct draw_stage *stage, struct prim_header *prim)
 967 {
 968    struct setup_stage *setup = setup_stage( stage );
 969    struct softpipe_context *softpipe = setup->softpipe;
 970    const struct pipe_shader_state *fs = &softpipe->fs->shader;
 971    const struct vertex_header *v0 = prim->v[0];
 972    const int sizeAttr = setup->softpipe->psize_slot;
 973    const float size
 974       = sizeAttr > 0 ? v0->data[sizeAttr][0]
 975       : setup->softpipe->rasterizer->point_size;
 976    const float halfSize = 0.5F * size;
 977    const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
 978    const float x = v0->data[0][0];  /* Note: data[0] is always position */
 979    const float y = v0->data[0][1];
 980    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 981    uint fragSlot;
 982
 983    /* For points, all interpolants are constant-valued.
 984     * However, for point sprites, we'll need to setup texcoords appropriately.
 985     * XXX: which coefficients are the texcoords???
 986     * We may do point sprites as textured quads...
 987     *
 988     * KW: We don't know which coefficients are texcoords - ultimately
 989     * the choice of what interpolation mode to use for each attribute
 990     * should be determined by the fragment program, using
 991     * per-attribute declaration statements that include interpolation
 992     * mode as a parameter.  So either the fragment program will have
 993     * to be adjusted for pointsprite vs normal point behaviour, or
 994     * otherwise a special interpolation mode will have to be defined
 995     * which matches the required behaviour for point sprites.  But -
 996     * the latter is not a feature of normal hardware, and as such
 997     * probably should be ruled out on that basis.
 998     */
 999    setup->vprovoke = prim->v[0];
1000
1001    /* setup Z, W */
1002    const_coeff(setup, &setup->posCoef, 0, 2);
1003    const_coeff(setup, &setup->posCoef, 0, 3);
1004
1005    for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) {
1006       const uint vertSlot = vinfo->src_index[fragSlot];
1007       uint j;
1008
1009       switch (vinfo->interp_mode[fragSlot]) {
1010       case INTERP_CONSTANT:
1011          /* fall-through */
1012       case INTERP_LINEAR:
1013          for (j = 0; j < NUM_CHANNELS; j++)
1014             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1015          break;
1016       case INTERP_PERSPECTIVE:
1017          for (j = 0; j < NUM_CHANNELS; j++)
1018             point_persp_coeff(setup, setup->vprovoke,
1019                               &setup->coef[fragSlot], vertSlot, j);
1020          break;
1021       case INTERP_POS:
1022          setup_fragcoord_coeff(setup, fragSlot);
1023          break;
1024       default:
1025          assert(0);
1026       }
1027
1028       if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
1029          /* FOG.y = front/back facing  XXX fix this */
1030          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
1031          setup->coef[fragSlot].dadx[1] = 0.0;
1032          setup->coef[fragSlot].dady[1] = 0.0;
1033       }
1034    }
1035
1036    setup->quad.prim = PRIM_POINT;
1037
1038    if (halfSize <= 0.5 && !round) {
1039       /* special case for 1-pixel points */
1040       const int ix = ((int) x) & 1;
1041       const int iy = ((int) y) & 1;
1042       setup->quad.x0 = (int) x - ix;
1043       setup->quad.y0 = (int) y - iy;
1044       setup->quad.mask = (1 << ix) << (2 * iy);
1045       clip_emit_quad(setup);
1046    }
1047    else {
1048       if (round) {
1049          /* rounded points */
1050          const int ixmin = block((int) (x - halfSize));
1051          const int ixmax = block((int) (x + halfSize));
1052          const int iymin = block((int) (y - halfSize));
1053          const int iymax = block((int) (y + halfSize));
1054          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1055          const float rmax = halfSize + 0.7071F;
1056          const float rmin2 = MAX2(0.0F, rmin * rmin);
1057          const float rmax2 = rmax * rmax;
1058          const float cscale = 1.0F / (rmax2 - rmin2);
1059          int ix, iy;
1060
1061          for (iy = iymin; iy <= iymax; iy += 2) {
1062             for (ix = ixmin; ix <= ixmax; ix += 2) {
1063                float dx, dy, dist2, cover;
1064
1065                setup->quad.mask = 0x0;
1066
1067                dx = (ix + 0.5f) - x;
1068                dy = (iy + 0.5f) - y;
1069                dist2 = dx * dx + dy * dy;
1070                if (dist2 <= rmax2) {
1071                   cover = 1.0F - (dist2 - rmin2) * cscale;
1072                   setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1073                   setup->quad.mask |= MASK_TOP_LEFT;
1074                }
1075
1076                dx = (ix + 1.5f) - x;
1077                dy = (iy + 0.5f) - y;
1078                dist2 = dx * dx + dy * dy;
1079                if (dist2 <= rmax2) {
1080                   cover = 1.0F - (dist2 - rmin2) * cscale;
1081                   setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1082                   setup->quad.mask |= MASK_TOP_RIGHT;
1083                }
1084
1085                dx = (ix + 0.5f) - x;
1086                dy = (iy + 1.5f) - y;
1087                dist2 = dx * dx + dy * dy;
1088                if (dist2 <= rmax2) {
1089                   cover = 1.0F - (dist2 - rmin2) * cscale;
1090                   setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1091                   setup->quad.mask |= MASK_BOTTOM_LEFT;
1092                }
1093
1094                dx = (ix + 1.5f) - x;
1095                dy = (iy + 1.5f) - y;
1096                dist2 = dx * dx + dy * dy;
1097                if (dist2 <= rmax2) {
1098                   cover = 1.0F - (dist2 - rmin2) * cscale;
1099                   setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1100                   setup->quad.mask |= MASK_BOTTOM_RIGHT;
1101                }
1102
1103                if (setup->quad.mask) {
1104                   setup->quad.x0 = ix;
1105                   setup->quad.y0 = iy;
1106                   clip_emit_quad(setup);
1107                }
1108             }
1109          }
1110       }
1111       else {
1112          /* square points */
1113          const int xmin = (int) (x + 0.75 - halfSize);
1114          const int ymin = (int) (y + 0.25 - halfSize);
1115          const int xmax = xmin + (int) size;
1116          const int ymax = ymin + (int) size;
1117          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1118          const int ixmin = block(xmin);
1119          const int ixmax = block(xmax - 1);
1120          const int iymin = block(ymin);
1121          const int iymax = block(ymax - 1);
1122          int ix, iy;
1123
1124          /*
1125          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1126          */
1127          for (iy = iymin; iy <= iymax; iy += 2) {
1128             uint rowMask = 0xf;
1129             if (iy < ymin) {
1130                /* above the top edge */
1131                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1132             }
1133             if (iy + 1 >= ymax) {
1134                /* below the bottom edge */
1135                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1136             }
1137
1138             for (ix = ixmin; ix <= ixmax; ix += 2) {
1139                uint mask = rowMask;
1140
1141                if (ix < xmin) {
1142                   /* fragment is past left edge of point, turn off left bits */
1143                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1144                }
1145                if (ix + 1 >= xmax) {
1146                   /* past the right edge */
1147                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1148                }
1149
1150                setup->quad.mask = mask;
1151                setup->quad.x0 = ix;
1152                setup->quad.y0 = iy;
1153                clip_emit_quad(setup);
1154             }
1155          }
1156       }
1157    }
1158 }
1159
1160
1161
1162 static void setup_begin( struct draw_stage *stage )
1163 {
1164    struct setup_stage *setup = setup_stage(stage);
1165    struct softpipe_context *sp = setup->softpipe;
1166    const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
1167
1168    if (sp->dirty) {
1169       softpipe_update_derived(sp);
1170    }
1171
1172    setup->quad.nr_attrs = fs->num_inputs;
1173
1174    sp->quad.first->begin(sp->quad.first);
1175
1176    stage->point = setup_point;
1177    stage->line = setup_line;
1178    stage->tri = setup_tri;
1179 }
1180
1181
1182 static void setup_first_point( struct draw_stage *stage,
1183                                struct prim_header *header )
1184 {
1185    setup_begin(stage);
1186    stage->point( stage, header );
1187 }
1188
1189 static void setup_first_line( struct draw_stage *stage,
1190                                struct prim_header *header )
1191 {
1192    setup_begin(stage);
1193    stage->line( stage, header );
1194 }
1195
1196
1197 static void setup_first_tri( struct draw_stage *stage,
1198                                struct prim_header *header )
1199 {
1200    setup_begin(stage);
1201    stage->tri( stage, header );
1202 }
1203
1204
1205
1206 static void setup_flush( struct draw_stage *stage,
1207                          unsigned flags )
1208 {
1209    stage->point = setup_first_point;
1210    stage->line = setup_first_line;
1211    stage->tri = setup_first_tri;
1212 }
1213
1214
1215 static void reset_stipple_counter( struct draw_stage *stage )
1216 {
1217 }
1218
1219
1220 static void render_destroy( struct draw_stage *stage )
1221 {
1222    FREE( stage );
1223 }
1224
1225
1226 /**
1227  * Create a new primitive setup/render stage.
1228  */
1229 struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
1230 {
1231    struct setup_stage *setup = CALLOC_STRUCT(setup_stage);
1232
1233    setup->softpipe = softpipe;
1234    setup->stage.draw = softpipe->draw;
1235    setup->stage.point = setup_first_point;
1236    setup->stage.line = setup_first_line;
1237    setup->stage.tri = setup_first_tri;
1238    setup->stage.flush = setup_flush;
1239    setup->stage.reset_stipple_counter = reset_stipple_counter;
1240    setup->stage.destroy = render_destroy;
1241
1242    setup->quad.coef = setup->coef;
1243    setup->quad.posCoef = &setup->posCoef;
1244
1245    return &setup->stage;
1246 }