src/gallium/drivers/llvmpipe/lp_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30  *
  31  * \author  Keith Whitwell <keith@tungstengraphics.com>
  32  * \author  Brian Paul
  33  */
  34
  35 #include "lp_context.h"
  36 #include "lp_prim_setup.h"
  37 #include "lp_quad.h"
  38 #include "lp_setup.h"
  39 #include "lp_state.h"
  40 #include "draw/draw_context.h"
  41 #include "draw/draw_private.h"
  42 #include "draw/draw_vertex.h"
  43 #include "pipe/p_shader_tokens.h"
  44 #include "pipe/p_thread.h"
  45 #include "util/u_math.h"
  46 #include "util/u_memory.h"
  47 #include "lp_bld_debug.h"
  48 #include "lp_tile_cache.h"
  49 #include "lp_tile_soa.h"
  50
  51
  52 #define DEBUG_VERTS 0
  53 #define DEBUG_FRAGS 0
  54
  55 /**
  56  * Triangle edge info
  57  */
  58 struct edge {
  59    float dx;            /**< X(v1) - X(v0), used only during setup */
  60    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  61    float dxdy;          /**< dx/dy */
  62    float sx, sy;        /**< first sample point coord */
  63    int lines;           /**< number of lines on this edge */
  64 };
  65
  66
  67 #define MAX_QUADS 16
  68
  69
  70 /**
  71  * Triangle setup info (derived from draw_stage).
  72  * Also used for line drawing (taking some liberties).
  73  */
  74 struct setup_context {
  75    struct llvmpipe_context *llvmpipe;
  76
  77    /* Vertices are just an array of floats making up each attribute in
  78     * turn.  Currently fixed at 4 floats, but should change in time.
  79     * Codegen will help cope with this.
  80     */
  81    const float (*vmax)[4];
  82    const float (*vmid)[4];
  83    const float (*vmin)[4];
  84    const float (*vprovoke)[4];
  85
  86    struct edge ebot;
  87    struct edge etop;
  88    struct edge emaj;
  89
  90    float oneoverarea;
  91    int facing;
  92
  93    struct quad_header quad[MAX_QUADS];
  94    struct quad_header *quad_ptrs[MAX_QUADS];
  95    unsigned count;
  96
  97    struct quad_interp_coef coef;
  98
  99    struct {
 100       int left[2];   /**< [0] = row0, [1] = row1 */
 101       int right[2];
 102       int y;
 103    } span;
 104
 105 #if DEBUG_FRAGS
 106    uint numFragsEmitted;  /**< per primitive */
 107    uint numFragsWritten;  /**< per primitive */
 108 #endif
 109
 110    unsigned winding;            /* which winding to cull */
 111 };
 112
 113
 114
 115 /**
 116  * Execute fragment shader for the four fragments in the quad.
 117  */
 118 static void
 119 shade_quads(struct llvmpipe_context *llvmpipe,
 120             struct quad_header *quads[],
 121             unsigned nr)
 122 {
 123    struct lp_fragment_shader *fs = llvmpipe->fs;
 124    struct quad_header *quad = quads[0];
 125    const unsigned x = quad->input.x0;
 126    const unsigned y = quad->input.y0;
 127    uint8_t *tile;
 128    uint8_t *color;
 129    void *depth;
 130    uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
 131    unsigned chan_index;
 132    unsigned q;
 133
 134    assert(fs->current);
 135    if(!fs->current)
 136       return;
 137
 138    /* Sanity checks */
 139    assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
 140    assert(x % TILE_VECTOR_WIDTH == 0);
 141    assert(y % TILE_VECTOR_HEIGHT == 0);
 142    for (q = 0; q < nr; ++q) {
 143       assert(quads[q]->input.x0 == x + q*2);
 144       assert(quads[q]->input.y0 == y);
 145    }
 146
 147    /* mask */
 148    for (q = 0; q < 4; ++q)
 149       for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
 150          mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
 151
 152    /* color buffer */
 153    if(llvmpipe->framebuffer.nr_cbufs >= 1 &&
 154       llvmpipe->framebuffer.cbufs[0]) {
 155       tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
 156       color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
 157    }
 158    else
 159       color = NULL;
 160
 161    /* depth buffer */
 162    if(llvmpipe->zsbuf_map) {
 163       assert((x % 2) == 0);
 164       assert((y % 2) == 0);
 165       depth = llvmpipe->zsbuf_map +
 166               y*llvmpipe->zsbuf_transfer->stride +
 167               2*x*llvmpipe->zsbuf_transfer->block.size;
 168    }
 169    else
 170       depth = NULL;
 171
 172    /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
 173    assert(lp_check_alignment(mask, 16));
 174
 175    assert(lp_check_alignment(depth, 16));
 176    assert(lp_check_alignment(color, 16));
 177    assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
 178
 179    /* run shader */
 180    fs->current->jit_function( &llvmpipe->jit_context,
 181                               x, y,
 182                               quad->coef->a0,
 183                               quad->coef->dadx,
 184                               quad->coef->dady,
 185                               &mask[0][0],
 186                               color,
 187                               depth);
 188 }
 189
 190
 191
 192
 193 /**
 194  * Do triangle cull test using tri determinant (sign indicates orientation)
 195  * \return true if triangle is to be culled.
 196  */
 197 static INLINE boolean
 198 cull_tri(const struct setup_context *setup, float det)
 199 {
 200    if (det != 0) {
 201       /* if (det < 0 then Z points toward camera and triangle is
 202        * counter-clockwise winding.
 203        */
 204       unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
 205
 206       if ((winding & setup->winding) == 0)
 207          return FALSE;
 208    }
 209
 210    /* Culled:
 211     */
 212    return TRUE;
 213 }
 214
 215
 216
 217 /**
 218  * Clip setup->quad against the scissor/surface bounds.
 219  */
 220 static INLINE void
 221 quad_clip( struct setup_context *setup, struct quad_header *quad )
 222 {
 223    const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
 224    const int minx = (int) cliprect->minx;
 225    const int maxx = (int) cliprect->maxx;
 226    const int miny = (int) cliprect->miny;
 227    const int maxy = (int) cliprect->maxy;
 228
 229    if (quad->input.x0 >= maxx ||
 230        quad->input.y0 >= maxy ||
 231        quad->input.x0 + 1 < minx ||
 232        quad->input.y0 + 1 < miny) {
 233       /* totally clipped */
 234       quad->inout.mask = 0x0;
 235       return;
 236    }
 237    if (quad->input.x0 < minx)
 238       quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
 239    if (quad->input.y0 < miny)
 240       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 241    if (quad->input.x0 == maxx - 1)
 242       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
 243    if (quad->input.y0 == maxy - 1)
 244       quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
 245 }
 246
 247
 248
 249 /**
 250  * Given an X or Y coordinate, return the block/quad coordinate that it
 251  * belongs to.
 252  */
 253 static INLINE int block( int x )
 254 {
 255    return x & ~(2-1);
 256 }
 257
 258 static INLINE int block_x( int x )
 259 {
 260    return x & ~(TILE_VECTOR_WIDTH - 1);
 261 }
 262
 263
 264 /**
 265  * Emit a quad (pass to next stage) with clipping.
 266  */
 267 static INLINE void
 268 clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
 269 {
 270    quad_clip( setup, quad );
 271
 272    if (quad->inout.mask) {
 273       struct llvmpipe_context *lp = setup->llvmpipe;
 274
 275 #if 1
 276       /* XXX: The blender expects 4 quads. This is far from efficient, but
 277        * until we codegenerate single-quad variants of the fragment pipeline
 278        * we need this hack. */
 279       const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
 280       struct quad_header quads[nr_quads];
 281       struct quad_header *quad_ptrs[nr_quads];
 282       int x0 = block_x(quad->input.x0);
 283       unsigned i;
 284
 285       for(i = 0; i < nr_quads; ++i) {
 286          int x = x0 + 2*i;
 287          if(x == quad->input.x0)
 288             memcpy(&quads[i], quad, sizeof quads[i]);
 289          else {
 290             memset(&quads[i], 0, sizeof quads[i]);
 291             quads[i].input.x0 = x;
 292             quads[i].input.y0 = quad->input.y0;
 293             quads[i].coef = quad->coef;
 294          }
 295          quad_ptrs[i] = &quads[i];
 296       }
 297
 298       shade_quads( lp, quad_ptrs, nr_quads );
 299 #else
 300       shade_quads( lp, &quad, 1 );
 301 #endif
 302    }
 303 }
 304
 305
 306 /**
 307  * Render a horizontal span of quads
 308  */
 309 static void flush_spans( struct setup_context *setup )
 310 {
 311    const int step = TILE_VECTOR_WIDTH;
 312    const int xleft0 = setup->span.left[0];
 313    const int xleft1 = setup->span.left[1];
 314    const int xright0 = setup->span.right[0];
 315    const int xright1 = setup->span.right[1];
 316
 317
 318    int minleft = block_x(MIN2(xleft0, xleft1));
 319    int maxright = MAX2(xright0, xright1);
 320    int x;
 321
 322    for (x = minleft; x < maxright; x += step) {
 323       unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
 324       unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
 325       unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
 326       unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
 327       unsigned lx = x;
 328       const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
 329       unsigned q = 0;
 330
 331       unsigned skipmask_left0 = (1U << skip_left0) - 1U;
 332       unsigned skipmask_left1 = (1U << skip_left1) - 1U;
 333
 334       /* These calculations fail when step == 32 and skip_right == 0.
 335        */
 336       unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
 337       unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
 338
 339       unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
 340       unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
 341
 342       if (mask0 | mask1) {
 343          for(q = 0; q < nr_quads; ++q) {
 344             unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
 345             setup->quad[q].input.x0 = lx;
 346             setup->quad[q].input.y0 = setup->span.y;
 347             setup->quad[q].inout.mask = quadmask;
 348             setup->quad_ptrs[q] = &setup->quad[q];
 349             mask0 >>= 2;
 350             mask1 >>= 2;
 351             lx += 2;
 352          }
 353          assert(!(mask0 | mask1));
 354
 355          shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads );
 356       }
 357    }
 358
 359
 360    setup->span.y = 0;
 361    setup->span.right[0] = 0;
 362    setup->span.right[1] = 0;
 363    setup->span.left[0] = 1000000;     /* greater than right[0] */
 364    setup->span.left[1] = 1000000;     /* greater than right[1] */
 365 }
 366
 367
 368 #if DEBUG_VERTS
 369 static void print_vertex(const struct setup_context *setup,
 370                          const float (*v)[4])
 371 {
 372    int i;
 373    debug_printf("   Vertex: (%p)\n", v);
 374    for (i = 0; i < setup->quad[0].nr_attrs; i++) {
 375       debug_printf("     %d: %f %f %f %f\n",  i,
 376               v[i][0], v[i][1], v[i][2], v[i][3]);
 377       if (util_is_inf_or_nan(v[i][0])) {
 378          debug_printf("   NaN!\n");
 379       }
 380    }
 381 }
 382 #endif
 383
 384 /**
 385  * Sort the vertices from top to bottom order, setting up the triangle
 386  * edge fields (ebot, emaj, etop).
 387  * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
 388  */
 389 static boolean setup_sort_vertices( struct setup_context *setup,
 390                                     float det,
 391                                     const float (*v0)[4],
 392                                     const float (*v1)[4],
 393                                     const float (*v2)[4] )
 394 {
 395    setup->vprovoke = v2;
 396
 397    /* determine bottom to top order of vertices */
 398    {
 399       float y0 = v0[0][1];
 400       float y1 = v1[0][1];
 401       float y2 = v2[0][1];
 402       if (y0 <= y1) {
 403          if (y1 <= y2) {
 404             /* y0<=y1<=y2 */
 405             setup->vmin = v0;
 406             setup->vmid = v1;
 407             setup->vmax = v2;
 408          }
 409          else if (y2 <= y0) {
 410             /* y2<=y0<=y1 */
 411             setup->vmin = v2;
 412             setup->vmid = v0;
 413             setup->vmax = v1;
 414          }
 415          else {
 416             /* y0<=y2<=y1 */
 417             setup->vmin = v0;
 418             setup->vmid = v2;
 419             setup->vmax = v1;
 420          }
 421       }
 422       else {
 423          if (y0 <= y2) {
 424             /* y1<=y0<=y2 */
 425             setup->vmin = v1;
 426             setup->vmid = v0;
 427             setup->vmax = v2;
 428          }
 429          else if (y2 <= y1) {
 430             /* y2<=y1<=y0 */
 431             setup->vmin = v2;
 432             setup->vmid = v1;
 433             setup->vmax = v0;
 434          }
 435          else {
 436             /* y1<=y2<=y0 */
 437             setup->vmin = v1;
 438             setup->vmid = v2;
 439             setup->vmax = v0;
 440          }
 441       }
 442    }
 443
 444    setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
 445    setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
 446    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
 447    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
 448    setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
 449    setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
 450
 451    /*
 452     * Compute triangle's area.  Use 1/area to compute partial
 453     * derivatives of attributes later.
 454     *
 455     * The area will be the same as prim->det, but the sign may be
 456     * different depending on how the vertices get sorted above.
 457     *
 458     * To determine whether the primitive is front or back facing we
 459     * use the prim->det value because its sign is correct.
 460     */
 461    {
 462       const float area = (setup->emaj.dx * setup->ebot.dy -
 463                             setup->ebot.dx * setup->emaj.dy);
 464
 465       setup->oneoverarea = 1.0f / area;
 466
 467       /*
 468       debug_printf("%s one-over-area %f  area %f  det %f\n",
 469                    __FUNCTION__, setup->oneoverarea, area, det );
 470       */
 471       if (util_is_inf_or_nan(setup->oneoverarea))
 472          return FALSE;
 473    }
 474
 475    /* We need to know if this is a front or back-facing triangle for:
 476     *  - the GLSL gl_FrontFacing fragment attribute (bool)
 477     *  - two-sided stencil test
 478     */
 479    setup->facing =
 480       ((det > 0.0) ^
 481        (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
 482
 483    return TRUE;
 484 }
 485
 486
 487 /**
 488  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 489  * for a triangle.
 490  */
 491 static void tri_pos_coeff( struct setup_context *setup,
 492                            uint vertSlot, unsigned i)
 493 {
 494    float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
 495    float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 496    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 497    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 498    float dadx = a * setup->oneoverarea;
 499    float dady = b * setup->oneoverarea;
 500
 501    assert(i <= 3);
 502
 503    setup->coef.dadx[0][i] = dadx;
 504    setup->coef.dady[0][i] = dady;
 505
 506    /* calculate a0 as the value which would be sampled for the
 507     * fragment at (0,0), taking into account that we want to sample at
 508     * pixel centers, in other words (0.5, 0.5).
 509     *
 510     * this is neat but unfortunately not a good way to do things for
 511     * triangles with very large values of dadx or dady as it will
 512     * result in the subtraction and re-addition from a0 of a very
 513     * large number, which means we'll end up loosing a lot of the
 514     * fractional bits and precision from a0.  the way to fix this is
 515     * to define a0 as the sample at a pixel center somewhere near vmin
 516     * instead - i'll switch to this later.
 517     */
 518    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
 519                            (dadx * (setup->vmin[0][0] - 0.5f) +
 520                             dady * (setup->vmin[0][1] - 0.5f)));
 521
 522    /*
 523    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 524                 slot, "xyzw"[i],
 525                 setup->coef[slot].a0[i],
 526                 setup->coef[slot].dadx[i],
 527                 setup->coef[slot].dady[i]);
 528    */
 529 }
 530
 531
 532 /**
 533  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 534  * The value value comes from vertex[slot][i].
 535  * The result will be put into setup->coef[slot].a0[i].
 536  * \param slot  which attribute slot
 537  * \param i  which component of the slot (0..3)
 538  */
 539 static void const_pos_coeff( struct setup_context *setup,
 540                              uint vertSlot, unsigned i)
 541 {
 542    setup->coef.dadx[0][i] = 0;
 543    setup->coef.dady[0][i] = 0;
 544
 545    /* need provoking vertex info!
 546     */
 547    setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i];
 548 }
 549
 550
 551 /**
 552  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 553  * The value value comes from vertex[slot][i].
 554  * The result will be put into setup->coef[slot].a0[i].
 555  * \param slot  which attribute slot
 556  * \param i  which component of the slot (0..3)
 557  */
 558 static void const_coeff( struct setup_context *setup,
 559                          unsigned attrib,
 560                          uint vertSlot)
 561 {
 562    unsigned i;
 563    for (i = 0; i < NUM_CHANNELS; ++i) {
 564       setup->coef.dadx[1 + attrib][i] = 0;
 565       setup->coef.dady[1 + attrib][i] = 0;
 566
 567       /* need provoking vertex info!
 568        */
 569       setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i];
 570    }
 571 }
 572
 573
 574 /**
 575  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 576  * for a triangle.
 577  */
 578 static void tri_linear_coeff( struct setup_context *setup,
 579                               unsigned attrib,
 580                               uint vertSlot)
 581 {
 582    unsigned i;
 583    for (i = 0; i < NUM_CHANNELS; ++i) {
 584       float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
 585       float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 586       float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 587       float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 588       float dadx = a * setup->oneoverarea;
 589       float dady = b * setup->oneoverarea;
 590
 591       assert(i <= 3);
 592
 593       setup->coef.dadx[1 + attrib][i] = dadx;
 594       setup->coef.dady[1 + attrib][i] = dady;
 595
 596       /* calculate a0 as the value which would be sampled for the
 597        * fragment at (0,0), taking into account that we want to sample at
 598        * pixel centers, in other words (0.5, 0.5).
 599        *
 600        * this is neat but unfortunately not a good way to do things for
 601        * triangles with very large values of dadx or dady as it will
 602        * result in the subtraction and re-addition from a0 of a very
 603        * large number, which means we'll end up loosing a lot of the
 604        * fractional bits and precision from a0.  the way to fix this is
 605        * to define a0 as the sample at a pixel center somewhere near vmin
 606        * instead - i'll switch to this later.
 607        */
 608       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
 609                      (dadx * (setup->vmin[0][0] - 0.5f) +
 610                       dady * (setup->vmin[0][1] - 0.5f)));
 611
 612       /*
 613       debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 614                    slot, "xyzw"[i],
 615                    setup->coef[slot].a0[i],
 616                    setup->coef[slot].dadx[i],
 617                    setup->coef[slot].dady[i]);
 618       */
 619    }
 620 }
 621
 622
 623 /**
 624  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 625  * for a triangle.
 626  * We basically multiply the vertex value by 1/w before computing
 627  * the plane coefficients (a0, dadx, dady).
 628  * Later, when we compute the value at a particular fragment position we'll
 629  * divide the interpolated value by the interpolated W at that fragment.
 630  */
 631 static void tri_persp_coeff( struct setup_context *setup,
 632                              unsigned attrib,
 633                              uint vertSlot)
 634 {
 635    unsigned i;
 636    for (i = 0; i < NUM_CHANNELS; ++i) {
 637       /* premultiply by 1/w  (v[0][3] is always W):
 638        */
 639       float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
 640       float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
 641       float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
 642       float botda = mida - mina;
 643       float majda = maxa - mina;
 644       float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 645       float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 646       float dadx = a * setup->oneoverarea;
 647       float dady = b * setup->oneoverarea;
 648
 649       /*
 650       debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
 651                    setup->vmin[vertSlot][i],
 652                    setup->vmid[vertSlot][i],
 653                    setup->vmax[vertSlot][i]
 654              );
 655       */
 656       assert(i <= 3);
 657
 658       setup->coef.dadx[1 + attrib][i] = dadx;
 659       setup->coef.dady[1 + attrib][i] = dady;
 660       setup->coef.a0[1 + attrib][i] = (mina -
 661                      (dadx * (setup->vmin[0][0] - 0.5f) +
 662                       dady * (setup->vmin[0][1] - 0.5f)));
 663    }
 664 }
 665
 666
 667 /**
 668  * Special coefficient setup for gl_FragCoord.
 669  * X and Y are trivial, though Y has to be inverted for OpenGL.
 670  * Z and W are copied from posCoef which should have already been computed.
 671  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 672  */
 673 static void
 674 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 675 {
 676    /*X*/
 677    setup->coef.a0[1 + slot][0] = 0;
 678    setup->coef.dadx[1 + slot][0] = 1.0;
 679    setup->coef.dady[1 + slot][0] = 0.0;
 680    /*Y*/
 681    setup->coef.a0[1 + slot][1] = 0.0;
 682    setup->coef.dadx[1 + slot][1] = 0.0;
 683    setup->coef.dady[1 + slot][1] = 1.0;
 684    /*Z*/
 685    setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2];
 686    setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2];
 687    setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2];
 688    /*W*/
 689    setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3];
 690    setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3];
 691    setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3];
 692 }
 693
 694
 695
 696 /**
 697  * Compute the setup->coef[] array dadx, dady, a0 values.
 698  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
 699  */
 700 static void setup_tri_coefficients( struct setup_context *setup )
 701 {
 702    struct llvmpipe_context *llvmpipe = setup->llvmpipe;
 703    const struct lp_fragment_shader *lpfs = llvmpipe->fs;
 704    const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
 705    uint fragSlot;
 706
 707    /* z and w are done by linear interpolation:
 708     */
 709    tri_pos_coeff(setup, 0, 2);
 710    tri_pos_coeff(setup, 0, 3);
 711
 712    /* setup interpolation for all the remaining attributes:
 713     */
 714    for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
 715       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
 716
 717       switch (vinfo->attrib[fragSlot].interp_mode) {
 718       case INTERP_CONSTANT:
 719          const_coeff(setup, fragSlot, vertSlot);
 720          break;
 721       case INTERP_LINEAR:
 722          tri_linear_coeff(setup, fragSlot, vertSlot);
 723          break;
 724       case INTERP_PERSPECTIVE:
 725          tri_persp_coeff(setup, fragSlot, vertSlot);
 726          break;
 727       case INTERP_POS:
 728          setup_fragcoord_coeff(setup, fragSlot);
 729          break;
 730       default:
 731          assert(0);
 732       }
 733
 734       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
 735          setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
 736          setup->coef.dadx[1 + fragSlot][0] = 0.0;
 737          setup->coef.dady[1 + fragSlot][0] = 0.0;
 738       }
 739    }
 740 }
 741
 742
 743
 744 static void setup_tri_edges( struct setup_context *setup )
 745 {
 746    float vmin_x = setup->vmin[0][0] + 0.5f;
 747    float vmid_x = setup->vmid[0][0] + 0.5f;
 748
 749    float vmin_y = setup->vmin[0][1] - 0.5f;
 750    float vmid_y = setup->vmid[0][1] - 0.5f;
 751    float vmax_y = setup->vmax[0][1] - 0.5f;
 752
 753    setup->emaj.sy = ceilf(vmin_y);
 754    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
 755    setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
 756    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
 757
 758    setup->etop.sy = ceilf(vmid_y);
 759    setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
 760    setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
 761    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
 762
 763    setup->ebot.sy = ceilf(vmin_y);
 764    setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
 765    setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
 766    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
 767 }
 768
 769
 770 /**
 771  * Render the upper or lower half of a triangle.
 772  * Scissoring/cliprect is applied here too.
 773  */
 774 static void subtriangle( struct setup_context *setup,
 775                          struct edge *eleft,
 776                          struct edge *eright,
 777                          unsigned lines )
 778 {
 779    const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
 780    const int minx = (int) cliprect->minx;
 781    const int maxx = (int) cliprect->maxx;
 782    const int miny = (int) cliprect->miny;
 783    const int maxy = (int) cliprect->maxy;
 784    int y, start_y, finish_y;
 785    int sy = (int)eleft->sy;
 786
 787    assert((int)eleft->sy == (int) eright->sy);
 788
 789    /* clip top/bottom */
 790    start_y = sy;
 791    if (start_y < miny)
 792       start_y = miny;
 793
 794    finish_y = sy + lines;
 795    if (finish_y > maxy)
 796       finish_y = maxy;
 797
 798    start_y -= sy;
 799    finish_y -= sy;
 800
 801    /*
 802    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
 803    */
 804
 805    for (y = start_y; y < finish_y; y++) {
 806
 807       /* avoid accumulating adds as floats don't have the precision to
 808        * accurately iterate large triangle edges that way.  luckily we
 809        * can just multiply these days.
 810        *
 811        * this is all drowned out by the attribute interpolation anyway.
 812        */
 813       int left = (int)(eleft->sx + y * eleft->dxdy);
 814       int right = (int)(eright->sx + y * eright->dxdy);
 815
 816       /* clip left/right */
 817       if (left < minx)
 818          left = minx;
 819       if (right > maxx)
 820          right = maxx;
 821
 822       if (left < right) {
 823          int _y = sy + y;
 824          if (block(_y) != setup->span.y) {
 825             flush_spans(setup);
 826             setup->span.y = block(_y);
 827          }
 828
 829          setup->span.left[_y&1] = left;
 830          setup->span.right[_y&1] = right;
 831       }
 832    }
 833
 834
 835    /* save the values so that emaj can be restarted:
 836     */
 837    eleft->sx += lines * eleft->dxdy;
 838    eright->sx += lines * eright->dxdy;
 839    eleft->sy += lines;
 840    eright->sy += lines;
 841 }
 842
 843
 844 /**
 845  * Recalculate prim's determinant.  This is needed as we don't have
 846  * get this information through the vbuf_render interface & we must
 847  * calculate it here.
 848  */
 849 static float
 850 calc_det( const float (*v0)[4],
 851           const float (*v1)[4],
 852           const float (*v2)[4] )
 853 {
 854    /* edge vectors e = v0 - v2, f = v1 - v2 */
 855    const float ex = v0[0][0] - v2[0][0];
 856    const float ey = v0[0][1] - v2[0][1];
 857    const float fx = v1[0][0] - v2[0][0];
 858    const float fy = v1[0][1] - v2[0][1];
 859
 860    /* det = cross(e,f).z */
 861    return ex * fy - ey * fx;
 862 }
 863
 864
 865 /**
 866  * Do setup for triangle rasterization, then render the triangle.
 867  */
 868 void llvmpipe_setup_tri( struct setup_context *setup,
 869                 const float (*v0)[4],
 870                 const float (*v1)[4],
 871                 const float (*v2)[4] )
 872 {
 873    float det;
 874
 875 #if DEBUG_VERTS
 876    debug_printf("Setup triangle:\n");
 877    print_vertex(setup, v0);
 878    print_vertex(setup, v1);
 879    print_vertex(setup, v2);
 880 #endif
 881
 882    if (setup->llvmpipe->no_rast)
 883       return;
 884
 885    det = calc_det(v0, v1, v2);
 886    /*
 887    debug_printf("%s\n", __FUNCTION__ );
 888    */
 889
 890 #if DEBUG_FRAGS
 891    setup->numFragsEmitted = 0;
 892    setup->numFragsWritten = 0;
 893 #endif
 894
 895    if (cull_tri( setup, det ))
 896       return;
 897
 898    if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
 899       return;
 900    setup_tri_coefficients( setup );
 901    setup_tri_edges( setup );
 902
 903    assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
 904
 905    setup->span.y = 0;
 906    setup->span.right[0] = 0;
 907    setup->span.right[1] = 0;
 908    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
 909
 910    /*   init_constant_attribs( setup ); */
 911
 912    if (setup->oneoverarea < 0.0) {
 913       /* emaj on left:
 914        */
 915       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
 916       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
 917    }
 918    else {
 919       /* emaj on right:
 920        */
 921       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
 922       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
 923    }
 924
 925    flush_spans( setup );
 926
 927 #if DEBUG_FRAGS
 928    printf("Tri: %u frags emitted, %u written\n",
 929           setup->numFragsEmitted,
 930           setup->numFragsWritten);
 931 #endif
 932 }
 933
 934
 935
 936 /**
 937  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 938  * for a line.
 939  */
 940 static void
 941 linear_pos_coeff(struct setup_context *setup,
 942                  uint vertSlot, uint i)
 943 {
 944    const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 945    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 946    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 947    setup->coef.dadx[0][i] = dadx;
 948    setup->coef.dady[0][i] = dady;
 949    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
 950                            (dadx * (setup->vmin[0][0] - 0.5f) +
 951                             dady * (setup->vmin[0][1] - 0.5f)));
 952 }
 953
 954
 955 /**
 956  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 957  * for a line.
 958  */
 959 static void
 960 line_linear_coeff(struct setup_context *setup,
 961                   unsigned attrib,
 962                   uint vertSlot)
 963 {
 964    unsigned i;
 965    for (i = 0; i < NUM_CHANNELS; ++i) {
 966       const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 967       const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 968       const float dady = da * setup->emaj.dy * setup->oneoverarea;
 969       setup->coef.dadx[1 + attrib][i] = dadx;
 970       setup->coef.dady[1 + attrib][i] = dady;
 971       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
 972                      (dadx * (setup->vmin[0][0] - 0.5f) +
 973                       dady * (setup->vmin[0][1] - 0.5f)));
 974    }
 975 }
 976
 977
 978 /**
 979  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 980  * for a line.
 981  */
 982 static void
 983 line_persp_coeff(struct setup_context *setup,
 984                  unsigned attrib,
 985                  uint vertSlot)
 986 {
 987    unsigned i;
 988    for (i = 0; i < NUM_CHANNELS; ++i) {
 989       /* XXX double-check/verify this arithmetic */
 990       const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
 991       const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
 992       const float da = a1 - a0;
 993       const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 994       const float dady = da * setup->emaj.dy * setup->oneoverarea;
 995       setup->coef.dadx[1 + attrib][i] = dadx;
 996       setup->coef.dady[1 + attrib][i] = dady;
 997       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
 998                      (dadx * (setup->vmin[0][0] - 0.5f) +
 999                       dady * (setup->vmin[0][1] - 0.5f)));
1000    }
1001 }
1002
1003
1004 /**
1005  * Compute the setup->coef[] array dadx, dady, a0 values.
1006  * Must be called after setup->vmin,vmax are initialized.
1007  */
1008 static INLINE boolean
1009 setup_line_coefficients(struct setup_context *setup,
1010                         const float (*v0)[4],
1011                         const float (*v1)[4])
1012 {
1013    struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1014    const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1015    const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1016    uint fragSlot;
1017    float area;
1018
1019    /* use setup->vmin, vmax to point to vertices */
1020    if (llvmpipe->rasterizer->flatshade_first)
1021       setup->vprovoke = v0;
1022    else
1023       setup->vprovoke = v1;
1024    setup->vmin = v0;
1025    setup->vmax = v1;
1026
1027    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
1028    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
1029
1030    /* NOTE: this is not really area but something proportional to it */
1031    area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
1032    if (area == 0.0f || util_is_inf_or_nan(area))
1033       return FALSE;
1034    setup->oneoverarea = 1.0f / area;
1035
1036    /* z and w are done by linear interpolation:
1037     */
1038    linear_pos_coeff(setup, 0, 2);
1039    linear_pos_coeff(setup, 0, 3);
1040
1041    /* setup interpolation for all the remaining attributes:
1042     */
1043    for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1044       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1045
1046       switch (vinfo->attrib[fragSlot].interp_mode) {
1047       case INTERP_CONSTANT:
1048          const_coeff(setup, fragSlot, vertSlot);
1049          break;
1050       case INTERP_LINEAR:
1051          line_linear_coeff(setup, fragSlot, vertSlot);
1052          break;
1053       case INTERP_PERSPECTIVE:
1054          line_persp_coeff(setup, fragSlot, vertSlot);
1055          break;
1056       case INTERP_POS:
1057          setup_fragcoord_coeff(setup, fragSlot);
1058          break;
1059       default:
1060          assert(0);
1061       }
1062
1063       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1064          setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1065          setup->coef.dadx[1 + fragSlot][0] = 0.0;
1066          setup->coef.dady[1 + fragSlot][0] = 0.0;
1067       }
1068    }
1069    return TRUE;
1070 }
1071
1072
1073 /**
1074  * Plot a pixel in a line segment.
1075  */
1076 static INLINE void
1077 plot(struct setup_context *setup, int x, int y)
1078 {
1079    const int iy = y & 1;
1080    const int ix = x & 1;
1081    const int quadX = x - ix;
1082    const int quadY = y - iy;
1083    const int mask = (1 << ix) << (2 * iy);
1084
1085    if (quadX != setup->quad[0].input.x0 ||
1086        quadY != setup->quad[0].input.y0)
1087    {
1088       /* flush prev quad, start new quad */
1089
1090       if (setup->quad[0].input.x0 != -1)
1091          clip_emit_quad( setup, &setup->quad[0] );
1092
1093       setup->quad[0].input.x0 = quadX;
1094       setup->quad[0].input.y0 = quadY;
1095       setup->quad[0].inout.mask = 0x0;
1096    }
1097
1098    setup->quad[0].inout.mask |= mask;
1099 }
1100
1101
1102 /**
1103  * Do setup for line rasterization, then render the line.
1104  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
1105  * to handle stippling and wide lines.
1106  */
1107 void
1108 llvmpipe_setup_line(struct setup_context *setup,
1109            const float (*v0)[4],
1110            const float (*v1)[4])
1111 {
1112    int x0 = (int) v0[0][0];
1113    int x1 = (int) v1[0][0];
1114    int y0 = (int) v0[0][1];
1115    int y1 = (int) v1[0][1];
1116    int dx = x1 - x0;
1117    int dy = y1 - y0;
1118    int xstep, ystep;
1119
1120 #if DEBUG_VERTS
1121    debug_printf("Setup line:\n");
1122    print_vertex(setup, v0);
1123    print_vertex(setup, v1);
1124 #endif
1125
1126    if (setup->llvmpipe->no_rast)
1127       return;
1128
1129    if (dx == 0 && dy == 0)
1130       return;
1131
1132    if (!setup_line_coefficients(setup, v0, v1))
1133       return;
1134
1135    assert(v0[0][0] < 1.0e9);
1136    assert(v0[0][1] < 1.0e9);
1137    assert(v1[0][0] < 1.0e9);
1138    assert(v1[0][1] < 1.0e9);
1139
1140    if (dx < 0) {
1141       dx = -dx;   /* make positive */
1142       xstep = -1;
1143    }
1144    else {
1145       xstep = 1;
1146    }
1147
1148    if (dy < 0) {
1149       dy = -dy;   /* make positive */
1150       ystep = -1;
1151    }
1152    else {
1153       ystep = 1;
1154    }
1155
1156    assert(dx >= 0);
1157    assert(dy >= 0);
1158    assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES);
1159
1160    setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
1161    setup->quad[0].inout.mask = 0x0;
1162
1163    /* XXX temporary: set coverage to 1.0 so the line appears
1164     * if AA mode happens to be enabled.
1165     */
1166    setup->quad[0].input.coverage[0] =
1167    setup->quad[0].input.coverage[1] =
1168    setup->quad[0].input.coverage[2] =
1169    setup->quad[0].input.coverage[3] = 1.0;
1170
1171    if (dx > dy) {
1172       /*** X-major line ***/
1173       int i;
1174       const int errorInc = dy + dy;
1175       int error = errorInc - dx;
1176       const int errorDec = error - dx;
1177
1178       for (i = 0; i < dx; i++) {
1179          plot(setup, x0, y0);
1180
1181          x0 += xstep;
1182          if (error < 0) {
1183             error += errorInc;
1184          }
1185          else {
1186             error += errorDec;
1187             y0 += ystep;
1188          }
1189       }
1190    }
1191    else {
1192       /*** Y-major line ***/
1193       int i;
1194       const int errorInc = dx + dx;
1195       int error = errorInc - dy;
1196       const int errorDec = error - dy;
1197
1198       for (i = 0; i < dy; i++) {
1199          plot(setup, x0, y0);
1200
1201          y0 += ystep;
1202          if (error < 0) {
1203             error += errorInc;
1204          }
1205          else {
1206             error += errorDec;
1207             x0 += xstep;
1208          }
1209       }
1210    }
1211
1212    /* draw final quad */
1213    if (setup->quad[0].inout.mask) {
1214       clip_emit_quad( setup, &setup->quad[0] );
1215    }
1216 }
1217
1218
1219 static void
1220 point_persp_coeff(struct setup_context *setup,
1221                   const float (*vert)[4],
1222                   unsigned attrib,
1223                   uint vertSlot)
1224 {
1225    unsigned i;
1226    for(i = 0; i < NUM_CHANNELS; ++i) {
1227       setup->coef.dadx[1 + attrib][i] = 0.0F;
1228       setup->coef.dady[1 + attrib][i] = 0.0F;
1229       setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3];
1230    }
1231 }
1232
1233
1234 /**
1235  * Do setup for point rasterization, then render the point.
1236  * Round or square points...
1237  * XXX could optimize a lot for 1-pixel points.
1238  */
1239 void
1240 llvmpipe_setup_point( struct setup_context *setup,
1241              const float (*v0)[4] )
1242 {
1243    struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1244    const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1245    const int sizeAttr = setup->llvmpipe->psize_slot;
1246    const float size
1247       = sizeAttr > 0 ? v0[sizeAttr][0]
1248       : setup->llvmpipe->rasterizer->point_size;
1249    const float halfSize = 0.5F * size;
1250    const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth;
1251    const float x = v0[0][0];  /* Note: data[0] is always position */
1252    const float y = v0[0][1];
1253    const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1254    uint fragSlot;
1255
1256 #if DEBUG_VERTS
1257    debug_printf("Setup point:\n");
1258    print_vertex(setup, v0);
1259 #endif
1260
1261    if (llvmpipe->no_rast)
1262       return;
1263
1264    assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS);
1265
1266    /* For points, all interpolants are constant-valued.
1267     * However, for point sprites, we'll need to setup texcoords appropriately.
1268     * XXX: which coefficients are the texcoords???
1269     * We may do point sprites as textured quads...
1270     *
1271     * KW: We don't know which coefficients are texcoords - ultimately
1272     * the choice of what interpolation mode to use for each attribute
1273     * should be determined by the fragment program, using
1274     * per-attribute declaration statements that include interpolation
1275     * mode as a parameter.  So either the fragment program will have
1276     * to be adjusted for pointsprite vs normal point behaviour, or
1277     * otherwise a special interpolation mode will have to be defined
1278     * which matches the required behaviour for point sprites.  But -
1279     * the latter is not a feature of normal hardware, and as such
1280     * probably should be ruled out on that basis.
1281     */
1282    setup->vprovoke = v0;
1283
1284    /* setup Z, W */
1285    const_pos_coeff(setup, 0, 2);
1286    const_pos_coeff(setup, 0, 3);
1287
1288    for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1289       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1290
1291       switch (vinfo->attrib[fragSlot].interp_mode) {
1292       case INTERP_CONSTANT:
1293          /* fall-through */
1294       case INTERP_LINEAR:
1295          const_coeff(setup, fragSlot, vertSlot);
1296          break;
1297       case INTERP_PERSPECTIVE:
1298          point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot);
1299          break;
1300       case INTERP_POS:
1301          setup_fragcoord_coeff(setup, fragSlot);
1302          break;
1303       default:
1304          assert(0);
1305       }
1306
1307       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1308          setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1309          setup->coef.dadx[1 + fragSlot][0] = 0.0;
1310          setup->coef.dady[1 + fragSlot][0] = 0.0;
1311       }
1312    }
1313
1314
1315    if (halfSize <= 0.5 && !round) {
1316       /* special case for 1-pixel points */
1317       const int ix = ((int) x) & 1;
1318       const int iy = ((int) y) & 1;
1319       setup->quad[0].input.x0 = (int) x - ix;
1320       setup->quad[0].input.y0 = (int) y - iy;
1321       setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
1322       clip_emit_quad( setup, &setup->quad[0] );
1323    }
1324    else {
1325       if (round) {
1326          /* rounded points */
1327          const int ixmin = block((int) (x - halfSize));
1328          const int ixmax = block((int) (x + halfSize));
1329          const int iymin = block((int) (y - halfSize));
1330          const int iymax = block((int) (y + halfSize));
1331          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1332          const float rmax = halfSize + 0.7071F;
1333          const float rmin2 = MAX2(0.0F, rmin * rmin);
1334          const float rmax2 = rmax * rmax;
1335          const float cscale = 1.0F / (rmax2 - rmin2);
1336          int ix, iy;
1337
1338          for (iy = iymin; iy <= iymax; iy += 2) {
1339             for (ix = ixmin; ix <= ixmax; ix += 2) {
1340                float dx, dy, dist2, cover;
1341
1342                setup->quad[0].inout.mask = 0x0;
1343
1344                dx = (ix + 0.5f) - x;
1345                dy = (iy + 0.5f) - y;
1346                dist2 = dx * dx + dy * dy;
1347                if (dist2 <= rmax2) {
1348                   cover = 1.0F - (dist2 - rmin2) * cscale;
1349                   setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1350                   setup->quad[0].inout.mask |= MASK_TOP_LEFT;
1351                }
1352
1353                dx = (ix + 1.5f) - x;
1354                dy = (iy + 0.5f) - y;
1355                dist2 = dx * dx + dy * dy;
1356                if (dist2 <= rmax2) {
1357                   cover = 1.0F - (dist2 - rmin2) * cscale;
1358                   setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1359                   setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
1360                }
1361
1362                dx = (ix + 0.5f) - x;
1363                dy = (iy + 1.5f) - y;
1364                dist2 = dx * dx + dy * dy;
1365                if (dist2 <= rmax2) {
1366                   cover = 1.0F - (dist2 - rmin2) * cscale;
1367                   setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1368                   setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
1369                }
1370
1371                dx = (ix + 1.5f) - x;
1372                dy = (iy + 1.5f) - y;
1373                dist2 = dx * dx + dy * dy;
1374                if (dist2 <= rmax2) {
1375                   cover = 1.0F - (dist2 - rmin2) * cscale;
1376                   setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1377                   setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
1378                }
1379
1380                if (setup->quad[0].inout.mask) {
1381                   setup->quad[0].input.x0 = ix;
1382                   setup->quad[0].input.y0 = iy;
1383                   clip_emit_quad( setup, &setup->quad[0] );
1384                }
1385             }
1386          }
1387       }
1388       else {
1389          /* square points */
1390          const int xmin = (int) (x + 0.75 - halfSize);
1391          const int ymin = (int) (y + 0.25 - halfSize);
1392          const int xmax = xmin + (int) size;
1393          const int ymax = ymin + (int) size;
1394          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1395          const int ixmin = block(xmin);
1396          const int ixmax = block(xmax - 1);
1397          const int iymin = block(ymin);
1398          const int iymax = block(ymax - 1);
1399          int ix, iy;
1400
1401          /*
1402          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1403          */
1404          for (iy = iymin; iy <= iymax; iy += 2) {
1405             uint rowMask = 0xf;
1406             if (iy < ymin) {
1407                /* above the top edge */
1408                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1409             }
1410             if (iy + 1 >= ymax) {
1411                /* below the bottom edge */
1412                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1413             }
1414
1415             for (ix = ixmin; ix <= ixmax; ix += 2) {
1416                uint mask = rowMask;
1417
1418                if (ix < xmin) {
1419                   /* fragment is past left edge of point, turn off left bits */
1420                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1421                }
1422                if (ix + 1 >= xmax) {
1423                   /* past the right edge */
1424                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1425                }
1426
1427                setup->quad[0].inout.mask = mask;
1428                setup->quad[0].input.x0 = ix;
1429                setup->quad[0].input.y0 = iy;
1430                clip_emit_quad( setup, &setup->quad[0] );
1431             }
1432          }
1433       }
1434    }
1435 }
1436
1437 void llvmpipe_setup_prepare( struct setup_context *setup )
1438 {
1439    struct llvmpipe_context *lp = setup->llvmpipe;
1440
1441    if (lp->dirty) {
1442       llvmpipe_update_derived(lp);
1443    }
1444
1445    if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1446        lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1447        lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1448       /* we'll do culling */
1449       setup->winding = lp->rasterizer->cull_mode;
1450    }
1451    else {
1452       /* 'draw' will do culling */
1453       setup->winding = PIPE_WINDING_NONE;
1454    }
1455 }
1456
1457
1458
1459 void llvmpipe_setup_destroy_context( struct setup_context *setup )
1460 {
1461    align_free( setup );
1462 }
1463
1464
1465 /**
1466  * Create a new primitive setup/render stage.
1467  */
1468 struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe )
1469 {
1470    struct setup_context *setup;
1471    unsigned i;
1472
1473    setup = align_malloc(sizeof(struct setup_context), 16);
1474    if (!setup)
1475       return NULL;
1476
1477    memset(setup, 0, sizeof *setup);
1478    setup->llvmpipe = llvmpipe;
1479
1480    for (i = 0; i < MAX_QUADS; i++) {
1481       setup->quad[i].coef = &setup->coef;
1482    }
1483
1484    setup->span.left[0] = 1000000;     /* greater than right[0] */
1485    setup->span.left[1] = 1000000;     /* greater than right[1] */
1486
1487    return setup;
1488 }
1489