src/gallium/drivers/softpipe/sp_prim_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30  *
  31  * \author  Keith Whitwell <keith@tungstengraphics.com>
  32  * \author  Brian Paul
  33  */
  34
  35
  36 #include "sp_context.h"
  37 #include "sp_headers.h"
  38 #include "sp_quad.h"
  39 #include "sp_state.h"
  40 #include "sp_prim_setup.h"
  41 #include "draw/draw_private.h"
  42 #include "draw/draw_vertex.h"
  43 #include "pipe/p_util.h"
  44 #include "pipe/p_shader_tokens.h"
  45
  46 #define DEBUG_VERTS 0
  47
  48 /**
  49  * Triangle edge info
  50  */
  51 struct edge {
  52    float dx;            /**< X(v1) - X(v0), used only during setup */
  53    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  54    float dxdy;          /**< dx/dy */
  55    float sx, sy;        /**< first sample point coord */
  56    int lines;           /**< number of lines on this edge */
  57 };
  58
  59
  60 /**
  61  * Triangle setup info (derived from draw_stage).
  62  * Also used for line drawing (taking some liberties).
  63  */
  64 struct setup_stage {
  65    struct draw_stage stage; /**< This must be first (base class) */
  66
  67    struct softpipe_context *softpipe;
  68
  69    /* Vertices are just an array of floats making up each attribute in
  70     * turn.  Currently fixed at 4 floats, but should change in time.
  71     * Codegen will help cope with this.
  72     */
  73    const struct vertex_header *vmax;
  74    const struct vertex_header *vmid;
  75    const struct vertex_header *vmin;
  76    const struct vertex_header *vprovoke;
  77
  78    struct edge ebot;
  79    struct edge etop;
  80    struct edge emaj;
  81
  82    float oneoverarea;
  83
  84    struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
  85    struct tgsi_interp_coef posCoef;  /* For Z, W */
  86    struct quad_header quad;
  87
  88    struct {
  89       int left[2];   /**< [0] = row0, [1] = row1 */
  90       int right[2];
  91       int y;
  92       unsigned y_flags;
  93       unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
  94    } span;
  95 };
  96
  97
  98
  99 /**
 100  * Basically a cast wrapper.
 101  */
 102 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
 103 {
 104    return (struct setup_stage *)stage;
 105 }
 106
 107
 108 /**
 109  * Clip setup->quad against the scissor/surface bounds.
 110  */
 111 static INLINE void
 112 quad_clip(struct setup_stage *setup)
 113 {
 114    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
 115    const int minx = (int) cliprect->minx;
 116    const int maxx = (int) cliprect->maxx;
 117    const int miny = (int) cliprect->miny;
 118    const int maxy = (int) cliprect->maxy;
 119
 120    if (setup->quad.x0 >= maxx ||
 121        setup->quad.y0 >= maxy ||
 122        setup->quad.x0 + 1 < minx ||
 123        setup->quad.y0 + 1 < miny) {
 124       /* totally clipped */
 125       setup->quad.mask = 0x0;
 126       return;
 127    }
 128    if (setup->quad.x0 < minx)
 129       setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
 130    if (setup->quad.y0 < miny)
 131       setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 132    if (setup->quad.x0 == maxx - 1)
 133       setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
 134    if (setup->quad.y0 == maxy - 1)
 135       setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
 136 }
 137
 138
 139 /**
 140  * Emit a quad (pass to next stage) with clipping.
 141  */
 142 static INLINE void
 143 clip_emit_quad(struct setup_stage *setup)
 144 {
 145    quad_clip(setup);
 146    if (setup->quad.mask) {
 147       struct softpipe_context *sp = setup->softpipe;
 148       sp->quad.first->run(sp->quad.first, &setup->quad);
 149    }
 150 }
 151
 152
 153 /**
 154  * Emit a quad (pass to next stage).  No clipping is done.
 155  */
 156 static INLINE void
 157 emit_quad( struct setup_stage *setup, int x, int y, unsigned mask )
 158 {
 159    struct softpipe_context *sp = setup->softpipe;
 160    setup->quad.x0 = x;
 161    setup->quad.y0 = y;
 162    setup->quad.mask = mask;
 163    sp->quad.first->run(sp->quad.first, &setup->quad);
 164 }
 165
 166
 167 /**
 168  * Given an X or Y coordinate, return the block/quad coordinate that it
 169  * belongs to.
 170  */
 171 static INLINE int block( int x )
 172 {
 173    return x & ~1;
 174 }
 175
 176
 177 /**
 178  * Compute mask which indicates which pixels in the 2x2 quad are actually inside
 179  * the triangle's bounds.
 180  *
 181  * this is pretty nasty...  may need to rework flush_spans again to
 182  * fix it, if possible.
 183  */
 184 static unsigned calculate_mask( struct setup_stage *setup, int x )
 185 {
 186    unsigned mask = 0x0;
 187
 188    if (x >= setup->span.left[0] && x < setup->span.right[0])
 189       mask |= MASK_TOP_LEFT;
 190
 191    if (x >= setup->span.left[1] && x < setup->span.right[1])
 192       mask |= MASK_BOTTOM_LEFT;
 193
 194    if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0])
 195       mask |= MASK_TOP_RIGHT;
 196
 197    if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1])
 198       mask |= MASK_BOTTOM_RIGHT;
 199
 200    return mask;
 201 }
 202
 203
 204 /**
 205  * Render a horizontal span of quads
 206  */
 207 static void flush_spans( struct setup_stage *setup )
 208 {
 209    int minleft, maxright;
 210    int x;
 211
 212    switch (setup->span.y_flags) {
 213    case 0x3:
 214       /* both odd and even lines written (both quad rows) */
 215       minleft = MIN2(setup->span.left[0], setup->span.left[1]);
 216       maxright = MAX2(setup->span.right[0], setup->span.right[1]);
 217       break;
 218
 219    case 0x1:
 220       /* only even line written (quad top row) */
 221       minleft = setup->span.left[0];
 222       maxright = setup->span.right[0];
 223       break;
 224
 225    case 0x2:
 226       /* only odd line written (quad bottom row) */
 227       minleft = setup->span.left[1];
 228       maxright = setup->span.right[1];
 229       break;
 230
 231    default:
 232       return;
 233    }
 234
 235    /* XXX this loop could be moved into the above switch cases and
 236     * calculate_mask() could be simplified a bit...
 237     */
 238    for (x = block(minleft); x <= block(maxright); x += 2) {
 239       emit_quad( setup, x, setup->span.y,
 240                  calculate_mask( setup, x ) );
 241    }
 242
 243    setup->span.y = 0;
 244    setup->span.y_flags = 0;
 245    setup->span.right[0] = 0;
 246    setup->span.right[1] = 0;
 247 }
 248
 249 #if DEBUG_VERTS
 250 static void print_vertex(const struct setup_stage *setup,
 251                          const struct vertex_header *v)
 252 {
 253    int i;
 254    debug_printf("Vertex: (%p)\n", v);
 255    for (i = 0; i < setup->quad.nr_attrs; i++) {
 256       debug_printf("  %d: %f %f %f %f\n",  i,
 257               v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
 258    }
 259 }
 260 #endif
 261
 262 static boolean setup_sort_vertices( struct setup_stage *setup,
 263                                       const struct prim_header *prim )
 264 {
 265    const struct vertex_header *v0 = prim->v[0];
 266    const struct vertex_header *v1 = prim->v[1];
 267    const struct vertex_header *v2 = prim->v[2];
 268
 269 #if DEBUG_VERTS
 270    debug_printf("Triangle:\n");
 271    print_vertex(setup, v0);
 272    print_vertex(setup, v1);
 273    print_vertex(setup, v2);
 274 #endif
 275
 276    setup->vprovoke = v2;
 277
 278    /* determine bottom to top order of vertices */
 279    {
 280       float y0 = v0->data[0][1];
 281       float y1 = v1->data[0][1];
 282       float y2 = v2->data[0][1];
 283       if (y0 <= y1) {
 284          if (y1 <= y2) {
 285             /* y0<=y1<=y2 */
 286             setup->vmin = v0;
 287             setup->vmid = v1;
 288             setup->vmax = v2;
 289          }
 290          else if (y2 <= y0) {
 291             /* y2<=y0<=y1 */
 292             setup->vmin = v2;
 293             setup->vmid = v0;
 294             setup->vmax = v1;
 295          }
 296          else {
 297             /* y0<=y2<=y1 */
 298             setup->vmin = v0;
 299             setup->vmid = v2;
 300             setup->vmax = v1;
 301          }
 302       }
 303       else {
 304          if (y0 <= y2) {
 305             /* y1<=y0<=y2 */
 306             setup->vmin = v1;
 307             setup->vmid = v0;
 308             setup->vmax = v2;
 309          }
 310          else if (y2 <= y1) {
 311             /* y2<=y1<=y0 */
 312             setup->vmin = v2;
 313             setup->vmid = v1;
 314             setup->vmax = v0;
 315          }
 316          else {
 317             /* y1<=y2<=y0 */
 318             setup->vmin = v1;
 319             setup->vmid = v2;
 320             setup->vmax = v0;
 321          }
 322       }
 323    }
 324
 325    setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0];
 326    setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1];
 327    setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0];
 328    setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1];
 329    setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0];
 330    setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1];
 331
 332    /*
 333     * Compute triangle's area.  Use 1/area to compute partial
 334     * derivatives of attributes later.
 335     *
 336     * The area will be the same as prim->det, but the sign may be
 337     * different depending on how the vertices get sorted above.
 338     *
 339     * To determine whether the primitive is front or back facing we
 340     * use the prim->det value because its sign is correct.
 341     */
 342    {
 343       const float area = (setup->emaj.dx * setup->ebot.dy -
 344                             setup->ebot.dx * setup->emaj.dy);
 345
 346       setup->oneoverarea = 1.0f / area;
 347       /*
 348       debug_printf("%s one-over-area %f  area %f  det %f\n",
 349                    __FUNCTION__, setup->oneoverarea, area, prim->det );
 350       */
 351    }
 352
 353    /* We need to know if this is a front or back-facing triangle for:
 354     *  - the GLSL gl_FrontFacing fragment attribute (bool)
 355     *  - two-sided stencil test
 356     */
 357    setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
 358
 359    return TRUE;
 360 }
 361
 362
 363 /**
 364  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 365  * The value value comes from vertex->data[slot][i].
 366  * The result will be put into setup->coef[slot].a0[i].
 367  * \param slot  which attribute slot
 368  * \param i  which component of the slot (0..3)
 369  */
 370 static void const_coeff( struct setup_stage *setup,
 371                          struct tgsi_interp_coef *coef,
 372                          uint vertSlot, uint i)
 373 {
 374    assert(i <= 3);
 375
 376    coef->dadx[i] = 0;
 377    coef->dady[i] = 0;
 378
 379    /* need provoking vertex info!
 380     */
 381    coef->a0[i] = setup->vprovoke->data[vertSlot][i];
 382 }
 383
 384
 385 /**
 386  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 387  * for a triangle.
 388  */
 389 static void tri_linear_coeff( struct setup_stage *setup,
 390                               struct tgsi_interp_coef *coef,
 391                               uint vertSlot, uint i)
 392 {
 393    float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
 394    float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
 395    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 396    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 397    float dadx = a * setup->oneoverarea;
 398    float dady = b * setup->oneoverarea;
 399
 400    assert(i <= 3);
 401
 402    coef->dadx[i] = dadx;
 403    coef->dady[i] = dady;
 404
 405    /* calculate a0 as the value which would be sampled for the
 406     * fragment at (0,0), taking into account that we want to sample at
 407     * pixel centers, in other words (0.5, 0.5).
 408     *
 409     * this is neat but unfortunately not a good way to do things for
 410     * triangles with very large values of dadx or dady as it will
 411     * result in the subtraction and re-addition from a0 of a very
 412     * large number, which means we'll end up loosing a lot of the
 413     * fractional bits and precision from a0.  the way to fix this is
 414     * to define a0 as the sample at a pixel center somewhere near vmin
 415     * instead - i'll switch to this later.
 416     */
 417    coef->a0[i] = (setup->vmin->data[vertSlot][i] -
 418                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 419                    dady * (setup->vmin->data[0][1] - 0.5f)));
 420
 421    /*
 422    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 423                 slot, "xyzw"[i],
 424                 setup->coef[slot].a0[i],
 425                 setup->coef[slot].dadx[i],
 426                 setup->coef[slot].dady[i]);
 427    */
 428 }
 429
 430
 431 /**
 432  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 433  * for a triangle.
 434  * We basically multiply the vertex value by 1/w before computing
 435  * the plane coefficients (a0, dadx, dady).
 436  * Later, when we compute the value at a particular fragment position we'll
 437  * divide the interpolated value by the interpolated W at that fragment.
 438  */
 439 static void tri_persp_coeff( struct setup_stage *setup,
 440                              struct tgsi_interp_coef *coef,
 441                              uint vertSlot, uint i)
 442 {
 443    /* premultiply by 1/w  (v->data[0][3] is always W):
 444     */
 445    float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
 446    float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3];
 447    float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3];
 448    float botda = mida - mina;
 449    float majda = maxa - mina;
 450    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 451    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 452    float dadx = a * setup->oneoverarea;
 453    float dady = b * setup->oneoverarea;
 454
 455    /*
 456    debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
 457                 setup->vmin->data[vertSlot][i],
 458                 setup->vmid->data[vertSlot][i],
 459                 setup->vmax->data[vertSlot][i]
 460           );
 461    */
 462    assert(i <= 3);
 463
 464    coef->dadx[i] = dadx;
 465    coef->dady[i] = dady;
 466    coef->a0[i] = (mina -
 467                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 468                    dady * (setup->vmin->data[0][1] - 0.5f)));
 469 }
 470
 471
 472 /**
 473  * Special coefficient setup for gl_FragCoord.
 474  * X and Y are trivial, though Y has to be inverted for OpenGL.
 475  * Z and W are copied from posCoef which should have already been computed.
 476  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 477  */
 478 static void
 479 setup_fragcoord_coeff(struct setup_stage *setup)
 480 {
 481    /*X*/
 482    setup->coef[0].a0[0] = 0;
 483    setup->coef[0].dadx[0] = 1.0;
 484    setup->coef[0].dady[0] = 0.0;
 485    /*Y*/
 486    if (setup->softpipe->rasterizer->origin_lower_left) {
 487       /* y=0=bottom */
 488       const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height;
 489       setup->coef[0].a0[1] = (float) (winHeight - 1);
 490       setup->coef[0].dady[1] = -1.0;
 491    }
 492    else {
 493       /* y=0=top */
 494       setup->coef[0].a0[1] = 0.0;
 495       setup->coef[0].dady[1] = 1.0;
 496    }
 497    setup->coef[0].dadx[1] = 0.0;
 498    /*Z*/
 499    setup->coef[0].a0[2] = setup->posCoef.a0[2];
 500    setup->coef[0].dadx[2] = setup->posCoef.dadx[2];
 501    setup->coef[0].dady[2] = setup->posCoef.dady[2];
 502    /*W*/
 503    setup->coef[0].a0[3] = setup->posCoef.a0[3];
 504    setup->coef[0].dadx[3] = setup->posCoef.dadx[3];
 505    setup->coef[0].dady[3] = setup->posCoef.dady[3];
 506 }
 507
 508
 509
 510 /**
 511  * Compute the setup->coef[] array dadx, dady, a0 values.
 512  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
 513  */
 514 static void setup_tri_coefficients( struct setup_stage *setup )
 515 {
 516    struct softpipe_context *softpipe = setup->softpipe;
 517    const struct pipe_shader_state *fs = &softpipe->fs->shader;
 518    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 519    uint fragSlot;
 520
 521    /* z and w are done by linear interpolation:
 522     */
 523    tri_linear_coeff(setup, &setup->posCoef, 0, 2);
 524    tri_linear_coeff(setup, &setup->posCoef, 0, 3);
 525
 526    /* setup interpolation for all the remaining attributes:
 527     */
 528    for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) {
 529       const uint vertSlot = vinfo->src_index[fragSlot];
 530       uint j;
 531
 532       switch (vinfo->interp_mode[fragSlot]) {
 533       case INTERP_CONSTANT:
 534          for (j = 0; j < NUM_CHANNELS; j++)
 535             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 536          break;
 537       case INTERP_LINEAR:
 538          for (j = 0; j < NUM_CHANNELS; j++)
 539             tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 540          break;
 541       case INTERP_PERSPECTIVE:
 542          for (j = 0; j < NUM_CHANNELS; j++)
 543             tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 544          break;
 545       case INTERP_POS:
 546          assert(fragSlot == 0);
 547          setup_fragcoord_coeff(setup);
 548          break;
 549       default:
 550          assert(0);
 551       }
 552
 553       if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
 554          /* FOG.y = front/back facing  XXX fix this */
 555          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
 556          setup->coef[fragSlot].dadx[1] = 0.0;
 557          setup->coef[fragSlot].dady[1] = 0.0;
 558       }
 559    }
 560 }
 561
 562
 563
 564 static void setup_tri_edges( struct setup_stage *setup )
 565 {
 566    float vmin_x = setup->vmin->data[0][0] + 0.5f;
 567    float vmid_x = setup->vmid->data[0][0] + 0.5f;
 568
 569    float vmin_y = setup->vmin->data[0][1] - 0.5f;
 570    float vmid_y = setup->vmid->data[0][1] - 0.5f;
 571    float vmax_y = setup->vmax->data[0][1] - 0.5f;
 572
 573    setup->emaj.sy = CEILF(vmin_y);
 574    setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy);
 575    setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
 576    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
 577
 578    setup->etop.sy = CEILF(vmid_y);
 579    setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy);
 580    setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
 581    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
 582
 583    setup->ebot.sy = CEILF(vmin_y);
 584    setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy);
 585    setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
 586    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
 587 }
 588
 589
 590 /**
 591  * Render the upper or lower half of a triangle.
 592  * Scissoring/cliprect is applied here too.
 593  */
 594 static void subtriangle( struct setup_stage *setup,
 595                          struct edge *eleft,
 596                          struct edge *eright,
 597                          unsigned lines )
 598 {
 599    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
 600    const int minx = (int) cliprect->minx;
 601    const int maxx = (int) cliprect->maxx;
 602    const int miny = (int) cliprect->miny;
 603    const int maxy = (int) cliprect->maxy;
 604    int y, start_y, finish_y;
 605    int sy = (int)eleft->sy;
 606
 607    assert((int)eleft->sy == (int) eright->sy);
 608
 609    /* clip top/bottom */
 610    start_y = sy;
 611    finish_y = sy + lines;
 612
 613    if (start_y < miny)
 614       start_y = miny;
 615
 616    if (finish_y > maxy)
 617       finish_y = maxy;
 618
 619    start_y -= sy;
 620    finish_y -= sy;
 621
 622    /*
 623    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
 624    */
 625
 626    for (y = start_y; y < finish_y; y++) {
 627
 628       /* avoid accumulating adds as floats don't have the precision to
 629        * accurately iterate large triangle edges that way.  luckily we
 630        * can just multiply these days.
 631        *
 632        * this is all drowned out by the attribute interpolation anyway.
 633        */
 634       int left = (int)(eleft->sx + y * eleft->dxdy);
 635       int right = (int)(eright->sx + y * eright->dxdy);
 636
 637       /* clip left/right */
 638       if (left < minx)
 639          left = minx;
 640       if (right > maxx)
 641          right = maxx;
 642
 643       if (left < right) {
 644          int _y = sy + y;
 645          if (block(_y) != setup->span.y) {
 646             flush_spans(setup);
 647             setup->span.y = block(_y);
 648          }
 649
 650          setup->span.left[_y&1] = left;
 651          setup->span.right[_y&1] = right;
 652          setup->span.y_flags |= 1<<(_y&1);
 653       }
 654    }
 655
 656
 657    /* save the values so that emaj can be restarted:
 658     */
 659    eleft->sx += lines * eleft->dxdy;
 660    eright->sx += lines * eright->dxdy;
 661    eleft->sy += lines;
 662    eright->sy += lines;
 663 }
 664
 665
 666 /**
 667  * Do setup for triangle rasterization, then render the triangle.
 668  */
 669 static void setup_tri( struct draw_stage *stage,
 670                        struct prim_header *prim )
 671 {
 672    struct setup_stage *setup = setup_stage( stage );
 673
 674    /*
 675    debug_printf("%s\n", __FUNCTION__ );
 676    */
 677
 678    setup_sort_vertices( setup, prim );
 679    setup_tri_coefficients( setup );
 680    setup_tri_edges( setup );
 681
 682    setup->quad.prim = PRIM_TRI;
 683
 684    setup->span.y = 0;
 685    setup->span.y_flags = 0;
 686    setup->span.right[0] = 0;
 687    setup->span.right[1] = 0;
 688    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
 689
 690    /*   init_constant_attribs( setup ); */
 691
 692    if (setup->oneoverarea < 0.0) {
 693       /* emaj on left:
 694        */
 695       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
 696       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
 697    }
 698    else {
 699       /* emaj on right:
 700        */
 701       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
 702       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
 703    }
 704
 705    flush_spans( setup );
 706 }
 707
 708
 709
 710 /**
 711  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 712  * for a line.
 713  */
 714 static void
 715 line_linear_coeff(struct setup_stage *setup,
 716                   struct tgsi_interp_coef *coef,
 717                   uint vertSlot, uint i)
 718 {
 719    const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i];
 720    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 721    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 722    coef->dadx[i] = dadx;
 723    coef->dady[i] = dady;
 724    coef->a0[i] = (setup->vmin->data[vertSlot][i] -
 725                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 726                    dady * (setup->vmin->data[0][1] - 0.5f)));
 727 }
 728
 729
 730 /**
 731  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 732  * for a line.
 733  */
 734 static void
 735 line_persp_coeff(struct setup_stage *setup,
 736                   struct tgsi_interp_coef *coef,
 737                   uint vertSlot, uint i)
 738 {
 739    /* XXX double-check/verify this arithmetic */
 740    const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3];
 741    const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3];
 742    const float da = a1 - a0;
 743    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 744    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 745    coef->dadx[i] = dadx;
 746    coef->dady[i] = dady;
 747    coef->a0[i] = (setup->vmin->data[vertSlot][i] -
 748                   (dadx * (setup->vmin->data[0][0] - 0.5f) +
 749                    dady * (setup->vmin->data[0][1] - 0.5f)));
 750 }
 751
 752
 753 /**
 754  * Compute the setup->coef[] array dadx, dady, a0 values.
 755  * Must be called after setup->vmin,vmax are initialized.
 756  */
 757 static INLINE void
 758 setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
 759 {
 760    struct softpipe_context *softpipe = setup->softpipe;
 761    const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
 762    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 763    uint fragSlot;
 764
 765    /* use setup->vmin, vmax to point to vertices */
 766    setup->vprovoke = prim->v[1];
 767    setup->vmin = prim->v[0];
 768    setup->vmax = prim->v[1];
 769
 770    setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0];
 771    setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1];
 772    /* NOTE: this is not really 1/area */
 773    setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx +
 774                                 setup->emaj.dy * setup->emaj.dy);
 775
 776    /* z and w are done by linear interpolation:
 777     */
 778    line_linear_coeff(setup, &setup->posCoef, 0, 2);
 779    line_linear_coeff(setup, &setup->posCoef, 0, 3);
 780
 781    /* setup interpolation for all the remaining attributes:
 782     */
 783    for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) {
 784       const uint vertSlot = vinfo->src_index[fragSlot];
 785       uint j;
 786
 787       switch (vinfo->interp_mode[fragSlot]) {
 788       case INTERP_CONSTANT:
 789          for (j = 0; j < NUM_CHANNELS; j++)
 790             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 791          break;
 792       case INTERP_LINEAR:
 793          for (j = 0; j < NUM_CHANNELS; j++)
 794             line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 795          break;
 796       case INTERP_PERSPECTIVE:
 797          for (j = 0; j < NUM_CHANNELS; j++)
 798             line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
 799          break;
 800       case INTERP_POS:
 801          assert(fragSlot == 0);
 802          assert(0); /* XXX fix this: */
 803          setup_fragcoord_coeff(setup);
 804          break;
 805       default:
 806          assert(0);
 807       }
 808
 809       if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
 810          /* FOG.y = front/back facing  XXX fix this */
 811          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
 812          setup->coef[fragSlot].dadx[1] = 0.0;
 813          setup->coef[fragSlot].dady[1] = 0.0;
 814       }
 815    }
 816 }
 817
 818
 819 /**
 820  * Plot a pixel in a line segment.
 821  */
 822 static INLINE void
 823 plot(struct setup_stage *setup, int x, int y)
 824 {
 825    const int iy = y & 1;
 826    const int ix = x & 1;
 827    const int quadX = x - ix;
 828    const int quadY = y - iy;
 829    const int mask = (1 << ix) << (2 * iy);
 830
 831    if (quadX != setup->quad.x0 ||
 832        quadY != setup->quad.y0)
 833    {
 834       /* flush prev quad, start new quad */
 835
 836       if (setup->quad.x0 != -1)
 837          clip_emit_quad(setup);
 838
 839       setup->quad.x0 = quadX;
 840       setup->quad.y0 = quadY;
 841       setup->quad.mask = 0x0;
 842    }
 843
 844    setup->quad.mask |= mask;
 845 }
 846
 847
 848 /**
 849  * Do setup for line rasterization, then render the line.
 850  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
 851  * to handle stippling and wide lines.
 852  */
 853 static void
 854 setup_line(struct draw_stage *stage, struct prim_header *prim)
 855 {
 856    const struct vertex_header *v0 = prim->v[0];
 857    const struct vertex_header *v1 = prim->v[1];
 858    struct setup_stage *setup = setup_stage( stage );
 859    int x0 = (int) v0->data[0][0];
 860    int x1 = (int) v1->data[0][0];
 861    int y0 = (int) v0->data[0][1];
 862    int y1 = (int) v1->data[0][1];
 863    int dx = x1 - x0;
 864    int dy = y1 - y0;
 865    int xstep, ystep;
 866
 867    if (dx == 0 && dy == 0)
 868       return;
 869
 870    setup_line_coefficients(setup, prim);
 871
 872    if (dx < 0) {
 873       dx = -dx;   /* make positive */
 874       xstep = -1;
 875    }
 876    else {
 877       xstep = 1;
 878    }
 879
 880    if (dy < 0) {
 881       dy = -dy;   /* make positive */
 882       ystep = -1;
 883    }
 884    else {
 885       ystep = 1;
 886    }
 887
 888    assert(dx >= 0);
 889    assert(dy >= 0);
 890
 891    setup->quad.x0 = setup->quad.y0 = -1;
 892    setup->quad.mask = 0x0;
 893    setup->quad.prim = PRIM_LINE;
 894    /* XXX temporary: set coverage to 1.0 so the line appears
 895     * if AA mode happens to be enabled.
 896     */
 897    setup->quad.coverage[0] =
 898    setup->quad.coverage[1] =
 899    setup->quad.coverage[2] =
 900    setup->quad.coverage[3] = 1.0;
 901
 902    if (dx > dy) {
 903       /*** X-major line ***/
 904       int i;
 905       const int errorInc = dy + dy;
 906       int error = errorInc - dx;
 907       const int errorDec = error - dx;
 908
 909       for (i = 0; i < dx; i++) {
 910          plot(setup, x0, y0);
 911
 912          x0 += xstep;
 913          if (error < 0) {
 914             error += errorInc;
 915          }
 916          else {
 917             error += errorDec;
 918             y0 += ystep;
 919          }
 920       }
 921    }
 922    else {
 923       /*** Y-major line ***/
 924       int i;
 925       const int errorInc = dx + dx;
 926       int error = errorInc - dy;
 927       const int errorDec = error - dy;
 928
 929       for (i = 0; i < dy; i++) {
 930          plot(setup, x0, y0);
 931
 932          y0 += ystep;
 933          if (error < 0) {
 934             error += errorInc;
 935          }
 936          else {
 937             error += errorDec;
 938             x0 += xstep;
 939          }
 940       }
 941    }
 942
 943    /* draw final quad */
 944    if (setup->quad.mask) {
 945       clip_emit_quad(setup);
 946    }
 947 }
 948
 949
 950 static void
 951 point_persp_coeff(struct setup_stage *setup,
 952                   const struct vertex_header *vert,
 953                   struct tgsi_interp_coef *coef,
 954                   uint vertSlot, uint i)
 955 {
 956    assert(i <= 3);
 957    coef->dadx[i] = 0.0F;
 958    coef->dady[i] = 0.0F;
 959    coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3];
 960 }
 961
 962
 963 /**
 964  * Do setup for point rasterization, then render the point.
 965  * Round or square points...
 966  * XXX could optimize a lot for 1-pixel points.
 967  */
 968 static void
 969 setup_point(struct draw_stage *stage, struct prim_header *prim)
 970 {
 971    struct setup_stage *setup = setup_stage( stage );
 972    struct softpipe_context *softpipe = setup->softpipe;
 973    const struct pipe_shader_state *fs = &softpipe->fs->shader;
 974    const struct vertex_header *v0 = prim->v[0];
 975    const int sizeAttr = setup->softpipe->psize_slot;
 976    const float size
 977       = sizeAttr > 0 ? v0->data[sizeAttr][0]
 978       : setup->softpipe->rasterizer->point_size;
 979    const float halfSize = 0.5F * size;
 980    const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
 981    const float x = v0->data[0][0];  /* Note: data[0] is always position */
 982    const float y = v0->data[0][1];
 983    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
 984    uint fragSlot;
 985
 986    /* For points, all interpolants are constant-valued.
 987     * However, for point sprites, we'll need to setup texcoords appropriately.
 988     * XXX: which coefficients are the texcoords???
 989     * We may do point sprites as textured quads...
 990     *
 991     * KW: We don't know which coefficients are texcoords - ultimately
 992     * the choice of what interpolation mode to use for each attribute
 993     * should be determined by the fragment program, using
 994     * per-attribute declaration statements that include interpolation
 995     * mode as a parameter.  So either the fragment program will have
 996     * to be adjusted for pointsprite vs normal point behaviour, or
 997     * otherwise a special interpolation mode will have to be defined
 998     * which matches the required behaviour for point sprites.  But -
 999     * the latter is not a feature of normal hardware, and as such
1000     * probably should be ruled out on that basis.
1001     */
1002    setup->vprovoke = prim->v[0];
1003
1004    /* setup Z, W */
1005    const_coeff(setup, &setup->posCoef, 0, 2);
1006    const_coeff(setup, &setup->posCoef, 0, 3);
1007
1008    for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) {
1009       const uint vertSlot = vinfo->src_index[fragSlot];
1010       uint j;
1011
1012       switch (vinfo->interp_mode[fragSlot]) {
1013       case INTERP_CONSTANT:
1014          /* fall-through */
1015       case INTERP_LINEAR:
1016          for (j = 0; j < NUM_CHANNELS; j++)
1017             const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1018          break;
1019       case INTERP_PERSPECTIVE:
1020          for (j = 0; j < NUM_CHANNELS; j++)
1021             point_persp_coeff(setup, setup->vprovoke,
1022                               &setup->coef[fragSlot], vertSlot, j);
1023          break;
1024       case INTERP_POS:
1025          assert(fragSlot == 0);
1026          assert(0); /* XXX fix this: */
1027          setup_fragcoord_coeff(setup);
1028          break;
1029       default:
1030          assert(0);
1031       }
1032
1033       if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
1034          /* FOG.y = front/back facing  XXX fix this */
1035          setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing;
1036          setup->coef[fragSlot].dadx[1] = 0.0;
1037          setup->coef[fragSlot].dady[1] = 0.0;
1038       }
1039    }
1040
1041    setup->quad.prim = PRIM_POINT;
1042
1043    if (halfSize <= 0.5 && !round) {
1044       /* special case for 1-pixel points */
1045       const int ix = ((int) x) & 1;
1046       const int iy = ((int) y) & 1;
1047       setup->quad.x0 = (int) x - ix;
1048       setup->quad.y0 = (int) y - iy;
1049       setup->quad.mask = (1 << ix) << (2 * iy);
1050       clip_emit_quad(setup);
1051    }
1052    else {
1053       if (round) {
1054          /* rounded points */
1055          const int ixmin = block((int) (x - halfSize));
1056          const int ixmax = block((int) (x + halfSize));
1057          const int iymin = block((int) (y - halfSize));
1058          const int iymax = block((int) (y + halfSize));
1059          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1060          const float rmax = halfSize + 0.7071F;
1061          const float rmin2 = MAX2(0.0F, rmin * rmin);
1062          const float rmax2 = rmax * rmax;
1063          const float cscale = 1.0F / (rmax2 - rmin2);
1064          int ix, iy;
1065
1066          for (iy = iymin; iy <= iymax; iy += 2) {
1067             for (ix = ixmin; ix <= ixmax; ix += 2) {
1068                float dx, dy, dist2, cover;
1069
1070                setup->quad.mask = 0x0;
1071
1072                dx = (ix + 0.5f) - x;
1073                dy = (iy + 0.5f) - y;
1074                dist2 = dx * dx + dy * dy;
1075                if (dist2 <= rmax2) {
1076                   cover = 1.0F - (dist2 - rmin2) * cscale;
1077                   setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1078                   setup->quad.mask |= MASK_TOP_LEFT;
1079                }
1080
1081                dx = (ix + 1.5f) - x;
1082                dy = (iy + 0.5f) - y;
1083                dist2 = dx * dx + dy * dy;
1084                if (dist2 <= rmax2) {
1085                   cover = 1.0F - (dist2 - rmin2) * cscale;
1086                   setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1087                   setup->quad.mask |= MASK_TOP_RIGHT;
1088                }
1089
1090                dx = (ix + 0.5f) - x;
1091                dy = (iy + 1.5f) - y;
1092                dist2 = dx * dx + dy * dy;
1093                if (dist2 <= rmax2) {
1094                   cover = 1.0F - (dist2 - rmin2) * cscale;
1095                   setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1096                   setup->quad.mask |= MASK_BOTTOM_LEFT;
1097                }
1098
1099                dx = (ix + 1.5f) - x;
1100                dy = (iy + 1.5f) - y;
1101                dist2 = dx * dx + dy * dy;
1102                if (dist2 <= rmax2) {
1103                   cover = 1.0F - (dist2 - rmin2) * cscale;
1104                   setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1105                   setup->quad.mask |= MASK_BOTTOM_RIGHT;
1106                }
1107
1108                if (setup->quad.mask) {
1109                   setup->quad.x0 = ix;
1110                   setup->quad.y0 = iy;
1111                   clip_emit_quad(setup);
1112                }
1113             }
1114          }
1115       }
1116       else {
1117          /* square points */
1118          const int xmin = (int) (x + 0.75 - halfSize);
1119          const int ymin = (int) (y + 0.25 - halfSize);
1120          const int xmax = xmin + (int) size;
1121          const int ymax = ymin + (int) size;
1122          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1123          const int ixmin = block(xmin);
1124          const int ixmax = block(xmax - 1);
1125          const int iymin = block(ymin);
1126          const int iymax = block(ymax - 1);
1127          int ix, iy;
1128
1129          /*
1130          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1131          */
1132          for (iy = iymin; iy <= iymax; iy += 2) {
1133             uint rowMask = 0xf;
1134             if (iy < ymin) {
1135                /* above the top edge */
1136                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1137             }
1138             if (iy + 1 >= ymax) {
1139                /* below the bottom edge */
1140                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1141             }
1142
1143             for (ix = ixmin; ix <= ixmax; ix += 2) {
1144                uint mask = rowMask;
1145
1146                if (ix < xmin) {
1147                   /* fragment is past left edge of point, turn off left bits */
1148                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1149                }
1150                if (ix + 1 >= xmax) {
1151                   /* past the right edge */
1152                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1153                }
1154
1155                setup->quad.mask = mask;
1156                setup->quad.x0 = ix;
1157                setup->quad.y0 = iy;
1158                clip_emit_quad(setup);
1159             }
1160          }
1161       }
1162    }
1163 }
1164
1165
1166
1167 static void setup_begin( struct draw_stage *stage )
1168 {
1169    struct setup_stage *setup = setup_stage(stage);
1170    struct softpipe_context *sp = setup->softpipe;
1171    const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
1172
1173    setup->quad.nr_attrs = fs->num_inputs;
1174
1175    sp->quad.first->begin(sp->quad.first);
1176
1177    stage->point = setup_point;
1178    stage->line = setup_line;
1179    stage->tri = setup_tri;
1180 }
1181
1182
1183 static void setup_first_point( struct draw_stage *stage,
1184                                struct prim_header *header )
1185 {
1186    setup_begin(stage);
1187    stage->point( stage, header );
1188 }
1189
1190 static void setup_first_line( struct draw_stage *stage,
1191                                struct prim_header *header )
1192 {
1193    setup_begin(stage);
1194    stage->line( stage, header );
1195 }
1196
1197
1198 static void setup_first_tri( struct draw_stage *stage,
1199                                struct prim_header *header )
1200 {
1201    setup_begin(stage);
1202    stage->tri( stage, header );
1203 }
1204
1205
1206
1207 static void setup_flush( struct draw_stage *stage,
1208                          unsigned flags )
1209 {
1210    stage->point = setup_first_point;
1211    stage->line = setup_first_line;
1212    stage->tri = setup_first_tri;
1213 }
1214
1215
1216 static void reset_stipple_counter( struct draw_stage *stage )
1217 {
1218 }
1219
1220
1221 static void render_destroy( struct draw_stage *stage )
1222 {
1223    FREE( stage );
1224 }
1225
1226
1227 /**
1228  * Create a new primitive setup/render stage.
1229  */
1230 struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
1231 {
1232    struct setup_stage *setup = CALLOC_STRUCT(setup_stage);
1233
1234    setup->softpipe = softpipe;
1235    setup->stage.draw = softpipe->draw;
1236    setup->stage.point = setup_first_point;
1237    setup->stage.line = setup_first_line;
1238    setup->stage.tri = setup_first_tri;
1239    setup->stage.flush = setup_flush;
1240    setup->stage.reset_stipple_counter = reset_stipple_counter;
1241    setup->stage.destroy = render_destroy;
1242
1243    setup->quad.coef = setup->coef;
1244    setup->quad.posCoef = &setup->posCoef;
1245
1246    return &setup->stage;
1247 }