src/gallium/drivers/llvmpipe/lp_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * \brief  Primitive rasterization/rendering (points, lines, triangles)
  30  *
  31  * \author  Keith Whitwell <keith@tungstengraphics.com>
  32  * \author  Brian Paul
  33  */
  34
  35 #include "lp_context.h"
  36 #include "lp_prim_setup.h"
  37 #include "lp_quad.h"
  38 #include "lp_setup.h"
  39 #include "lp_state.h"
  40 #include "draw/draw_context.h"
  41 #include "draw/draw_private.h"
  42 #include "draw/draw_vertex.h"
  43 #include "pipe/p_shader_tokens.h"
  44 #include "pipe/p_thread.h"
  45 #include "util/u_math.h"
  46 #include "util/u_memory.h"
  47 #include "lp_tile_cache.h"
  48 #include "lp_tile_soa.h"
  49
  50
  51 #define DEBUG_VERTS 0
  52 #define DEBUG_FRAGS 0
  53
  54 /**
  55  * Triangle edge info
  56  */
  57 struct edge {
  58    float dx;            /**< X(v1) - X(v0), used only during setup */
  59    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  60    float dxdy;          /**< dx/dy */
  61    float sx, sy;        /**< first sample point coord */
  62    int lines;           /**< number of lines on this edge */
  63 };
  64
  65
  66 #define MAX_QUADS 16
  67
  68
  69 /**
  70  * Triangle setup info (derived from draw_stage).
  71  * Also used for line drawing (taking some liberties).
  72  */
  73 struct setup_context {
  74    struct llvmpipe_context *llvmpipe;
  75
  76    /* Vertices are just an array of floats making up each attribute in
  77     * turn.  Currently fixed at 4 floats, but should change in time.
  78     * Codegen will help cope with this.
  79     */
  80    const float (*vmax)[4];
  81    const float (*vmid)[4];
  82    const float (*vmin)[4];
  83    const float (*vprovoke)[4];
  84
  85    struct edge ebot;
  86    struct edge etop;
  87    struct edge emaj;
  88
  89    float oneoverarea;
  90    int facing;
  91
  92    struct quad_header quad[MAX_QUADS];
  93    struct quad_header *quad_ptrs[MAX_QUADS];
  94    unsigned count;
  95
  96    struct quad_interp_coef coef;
  97
  98    struct {
  99       int left[2];   /**< [0] = row0, [1] = row1 */
 100       int right[2];
 101       int y;
 102    } span;
 103
 104 #if DEBUG_FRAGS
 105    uint numFragsEmitted;  /**< per primitive */
 106    uint numFragsWritten;  /**< per primitive */
 107 #endif
 108
 109    unsigned winding;            /* which winding to cull */
 110 };
 111
 112
 113
 114 /**
 115  * Execute fragment shader for the four fragments in the quad.
 116  */
 117 static void
 118 shade_quads(struct llvmpipe_context *llvmpipe,
 119             struct quad_header *quads[],
 120             unsigned nr)
 121 {
 122    struct lp_fragment_shader *fs = llvmpipe->fs;
 123    struct quad_header *quad = quads[0];
 124    const unsigned x = quad->input.x0;
 125    const unsigned y = quad->input.y0;
 126    uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
 127    uint8_t *color;
 128    void *depth;
 129    uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
 130    unsigned chan_index;
 131    unsigned q;
 132
 133    assert(fs->current);
 134    if(!fs->current)
 135       return;
 136
 137    /* Sanity checks */
 138    assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
 139    assert(x % TILE_VECTOR_WIDTH == 0);
 140    assert(y % TILE_VECTOR_HEIGHT == 0);
 141    for (q = 0; q < nr; ++q) {
 142       assert(quads[q]->input.x0 == x + q*2);
 143       assert(quads[q]->input.y0 == y);
 144    }
 145
 146    /* mask */
 147    for (q = 0; q < 4; ++q)
 148       for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
 149          mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
 150
 151    /* color buffer */
 152    color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
 153
 154    /* depth buffer */
 155    if(llvmpipe->zsbuf_map) {
 156       assert((x % 2) == 0);
 157       assert((y % 2) == 0);
 158       depth = llvmpipe->zsbuf_map +
 159               y*llvmpipe->zsbuf_transfer->stride +
 160               2*x*llvmpipe->zsbuf_transfer->block.size;
 161    }
 162    else
 163       depth = NULL;
 164
 165    /* TODO: blend color */
 166
 167    assert((((uintptr_t)mask) & 0xf) == 0);
 168    assert((((uintptr_t)depth) & 0xf) == 0);
 169    assert((((uintptr_t)color) & 0xf) == 0);
 170    assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0);
 171
 172    /* run shader */
 173    fs->current->jit_function( &llvmpipe->jit_context,
 174                               x, y,
 175                               quad->coef->a0,
 176                               quad->coef->dadx,
 177                               quad->coef->dady,
 178                               &mask[0][0],
 179                               color,
 180                               depth);
 181 }
 182
 183
 184
 185
 186 /**
 187  * Do triangle cull test using tri determinant (sign indicates orientation)
 188  * \return true if triangle is to be culled.
 189  */
 190 static INLINE boolean
 191 cull_tri(const struct setup_context *setup, float det)
 192 {
 193    if (det != 0) {
 194       /* if (det < 0 then Z points toward camera and triangle is
 195        * counter-clockwise winding.
 196        */
 197       unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
 198
 199       if ((winding & setup->winding) == 0)
 200          return FALSE;
 201    }
 202
 203    /* Culled:
 204     */
 205    return TRUE;
 206 }
 207
 208
 209
 210 /**
 211  * Clip setup->quad against the scissor/surface bounds.
 212  */
 213 static INLINE void
 214 quad_clip( struct setup_context *setup, struct quad_header *quad )
 215 {
 216    const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
 217    const int minx = (int) cliprect->minx;
 218    const int maxx = (int) cliprect->maxx;
 219    const int miny = (int) cliprect->miny;
 220    const int maxy = (int) cliprect->maxy;
 221
 222    if (quad->input.x0 >= maxx ||
 223        quad->input.y0 >= maxy ||
 224        quad->input.x0 + 1 < minx ||
 225        quad->input.y0 + 1 < miny) {
 226       /* totally clipped */
 227       quad->inout.mask = 0x0;
 228       return;
 229    }
 230    if (quad->input.x0 < minx)
 231       quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
 232    if (quad->input.y0 < miny)
 233       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 234    if (quad->input.x0 == maxx - 1)
 235       quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
 236    if (quad->input.y0 == maxy - 1)
 237       quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
 238 }
 239
 240
 241
 242 /**
 243  * Given an X or Y coordinate, return the block/quad coordinate that it
 244  * belongs to.
 245  */
 246 static INLINE int block( int x )
 247 {
 248    return x & ~(2-1);
 249 }
 250
 251 static INLINE int block_x( int x )
 252 {
 253    return x & ~(TILE_VECTOR_WIDTH - 1);
 254 }
 255
 256
 257 /**
 258  * Emit a quad (pass to next stage) with clipping.
 259  */
 260 static INLINE void
 261 clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
 262 {
 263    quad_clip( setup, quad );
 264
 265    if (quad->inout.mask) {
 266       struct llvmpipe_context *lp = setup->llvmpipe;
 267
 268 #if 1
 269       /* XXX: The blender expects 4 quads. This is far from efficient, but
 270        * until we codegenerate single-quad variants of the fragment pipeline
 271        * we need this hack. */
 272       const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
 273       struct quad_header quads[nr_quads];
 274       struct quad_header *quad_ptrs[nr_quads];
 275       int x0 = block_x(quad->input.x0);
 276       unsigned i;
 277
 278       for(i = 0; i < nr_quads; ++i) {
 279          int x = x0 + 2*i;
 280          if(x == quad->input.x0)
 281             memcpy(&quads[i], quad, sizeof quads[i]);
 282          else {
 283             memset(&quads[i], 0, sizeof quads[i]);
 284             quads[i].input.x0 = x;
 285             quads[i].input.y0 = quad->input.y0;
 286             quads[i].coef = quad->coef;
 287          }
 288          quad_ptrs[i] = &quads[i];
 289       }
 290
 291       shade_quads( lp, quad_ptrs, nr_quads );
 292 #else
 293       shade_quads( lp, &quad, 1 );
 294 #endif
 295    }
 296 }
 297
 298
 299 /**
 300  * Render a horizontal span of quads
 301  */
 302 static void flush_spans( struct setup_context *setup )
 303 {
 304    const int step = TILE_VECTOR_WIDTH;
 305    const int xleft0 = setup->span.left[0];
 306    const int xleft1 = setup->span.left[1];
 307    const int xright0 = setup->span.right[0];
 308    const int xright1 = setup->span.right[1];
 309
 310
 311    int minleft = block_x(MIN2(xleft0, xleft1));
 312    int maxright = MAX2(xright0, xright1);
 313    int x;
 314
 315    for (x = minleft; x < maxright; x += step) {
 316       unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
 317       unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
 318       unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
 319       unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
 320       unsigned lx = x;
 321       const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
 322       unsigned q = 0;
 323
 324       unsigned skipmask_left0 = (1U << skip_left0) - 1U;
 325       unsigned skipmask_left1 = (1U << skip_left1) - 1U;
 326
 327       /* These calculations fail when step == 32 and skip_right == 0.
 328        */
 329       unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
 330       unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
 331
 332       unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
 333       unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
 334
 335       if (mask0 | mask1) {
 336          for(q = 0; q < nr_quads; ++q) {
 337             unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
 338             setup->quad[q].input.x0 = lx;
 339             setup->quad[q].input.y0 = setup->span.y;
 340             setup->quad[q].inout.mask = quadmask;
 341             setup->quad_ptrs[q] = &setup->quad[q];
 342             mask0 >>= 2;
 343             mask1 >>= 2;
 344             lx += 2;
 345          }
 346          assert(!(mask0 | mask1));
 347
 348          shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads );
 349       }
 350    }
 351
 352
 353    setup->span.y = 0;
 354    setup->span.right[0] = 0;
 355    setup->span.right[1] = 0;
 356    setup->span.left[0] = 1000000;     /* greater than right[0] */
 357    setup->span.left[1] = 1000000;     /* greater than right[1] */
 358 }
 359
 360
 361 #if DEBUG_VERTS
 362 static void print_vertex(const struct setup_context *setup,
 363                          const float (*v)[4])
 364 {
 365    int i;
 366    debug_printf("   Vertex: (%p)\n", v);
 367    for (i = 0; i < setup->quad[0].nr_attrs; i++) {
 368       debug_printf("     %d: %f %f %f %f\n",  i,
 369               v[i][0], v[i][1], v[i][2], v[i][3]);
 370       if (util_is_inf_or_nan(v[i][0])) {
 371          debug_printf("   NaN!\n");
 372       }
 373    }
 374 }
 375 #endif
 376
 377 /**
 378  * Sort the vertices from top to bottom order, setting up the triangle
 379  * edge fields (ebot, emaj, etop).
 380  * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
 381  */
 382 static boolean setup_sort_vertices( struct setup_context *setup,
 383                                     float det,
 384                                     const float (*v0)[4],
 385                                     const float (*v1)[4],
 386                                     const float (*v2)[4] )
 387 {
 388    setup->vprovoke = v2;
 389
 390    /* determine bottom to top order of vertices */
 391    {
 392       float y0 = v0[0][1];
 393       float y1 = v1[0][1];
 394       float y2 = v2[0][1];
 395       if (y0 <= y1) {
 396          if (y1 <= y2) {
 397             /* y0<=y1<=y2 */
 398             setup->vmin = v0;
 399             setup->vmid = v1;
 400             setup->vmax = v2;
 401          }
 402          else if (y2 <= y0) {
 403             /* y2<=y0<=y1 */
 404             setup->vmin = v2;
 405             setup->vmid = v0;
 406             setup->vmax = v1;
 407          }
 408          else {
 409             /* y0<=y2<=y1 */
 410             setup->vmin = v0;
 411             setup->vmid = v2;
 412             setup->vmax = v1;
 413          }
 414       }
 415       else {
 416          if (y0 <= y2) {
 417             /* y1<=y0<=y2 */
 418             setup->vmin = v1;
 419             setup->vmid = v0;
 420             setup->vmax = v2;
 421          }
 422          else if (y2 <= y1) {
 423             /* y2<=y1<=y0 */
 424             setup->vmin = v2;
 425             setup->vmid = v1;
 426             setup->vmax = v0;
 427          }
 428          else {
 429             /* y1<=y2<=y0 */
 430             setup->vmin = v1;
 431             setup->vmid = v2;
 432             setup->vmax = v0;
 433          }
 434       }
 435    }
 436
 437    setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
 438    setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
 439    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
 440    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
 441    setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
 442    setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
 443
 444    /*
 445     * Compute triangle's area.  Use 1/area to compute partial
 446     * derivatives of attributes later.
 447     *
 448     * The area will be the same as prim->det, but the sign may be
 449     * different depending on how the vertices get sorted above.
 450     *
 451     * To determine whether the primitive is front or back facing we
 452     * use the prim->det value because its sign is correct.
 453     */
 454    {
 455       const float area = (setup->emaj.dx * setup->ebot.dy -
 456                             setup->ebot.dx * setup->emaj.dy);
 457
 458       setup->oneoverarea = 1.0f / area;
 459
 460       /*
 461       debug_printf("%s one-over-area %f  area %f  det %f\n",
 462                    __FUNCTION__, setup->oneoverarea, area, det );
 463       */
 464       if (util_is_inf_or_nan(setup->oneoverarea))
 465          return FALSE;
 466    }
 467
 468    /* We need to know if this is a front or back-facing triangle for:
 469     *  - the GLSL gl_FrontFacing fragment attribute (bool)
 470     *  - two-sided stencil test
 471     */
 472    setup->facing =
 473       ((det > 0.0) ^
 474        (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
 475
 476    return TRUE;
 477 }
 478
 479
 480 /**
 481  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 482  * for a triangle.
 483  */
 484 static void tri_pos_coeff( struct setup_context *setup,
 485                            uint vertSlot, unsigned i)
 486 {
 487    float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
 488    float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 489    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 490    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 491    float dadx = a * setup->oneoverarea;
 492    float dady = b * setup->oneoverarea;
 493
 494    assert(i <= 3);
 495
 496    setup->coef.dadx[0][i] = dadx;
 497    setup->coef.dady[0][i] = dady;
 498
 499    /* calculate a0 as the value which would be sampled for the
 500     * fragment at (0,0), taking into account that we want to sample at
 501     * pixel centers, in other words (0.5, 0.5).
 502     *
 503     * this is neat but unfortunately not a good way to do things for
 504     * triangles with very large values of dadx or dady as it will
 505     * result in the subtraction and re-addition from a0 of a very
 506     * large number, which means we'll end up loosing a lot of the
 507     * fractional bits and precision from a0.  the way to fix this is
 508     * to define a0 as the sample at a pixel center somewhere near vmin
 509     * instead - i'll switch to this later.
 510     */
 511    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
 512                            (dadx * (setup->vmin[0][0] - 0.5f) +
 513                             dady * (setup->vmin[0][1] - 0.5f)));
 514
 515    /*
 516    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 517                 slot, "xyzw"[i],
 518                 setup->coef[slot].a0[i],
 519                 setup->coef[slot].dadx[i],
 520                 setup->coef[slot].dady[i]);
 521    */
 522 }
 523
 524
 525 /**
 526  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 527  * The value value comes from vertex[slot][i].
 528  * The result will be put into setup->coef[slot].a0[i].
 529  * \param slot  which attribute slot
 530  * \param i  which component of the slot (0..3)
 531  */
 532 static void const_pos_coeff( struct setup_context *setup,
 533                              uint vertSlot, unsigned i)
 534 {
 535    setup->coef.dadx[0][i] = 0;
 536    setup->coef.dady[0][i] = 0;
 537
 538    /* need provoking vertex info!
 539     */
 540    setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i];
 541 }
 542
 543
 544 /**
 545  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 546  * The value value comes from vertex[slot][i].
 547  * The result will be put into setup->coef[slot].a0[i].
 548  * \param slot  which attribute slot
 549  * \param i  which component of the slot (0..3)
 550  */
 551 static void const_coeff( struct setup_context *setup,
 552                          unsigned attrib,
 553                          uint vertSlot)
 554 {
 555    unsigned i;
 556    for (i = 0; i < NUM_CHANNELS; ++i) {
 557       setup->coef.dadx[1 + attrib][i] = 0;
 558       setup->coef.dady[1 + attrib][i] = 0;
 559
 560       /* need provoking vertex info!
 561        */
 562       setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i];
 563    }
 564 }
 565
 566
 567 /**
 568  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 569  * for a triangle.
 570  */
 571 static void tri_linear_coeff( struct setup_context *setup,
 572                               unsigned attrib,
 573                               uint vertSlot)
 574 {
 575    unsigned i;
 576    for (i = 0; i < NUM_CHANNELS; ++i) {
 577       float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
 578       float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 579       float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 580       float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 581       float dadx = a * setup->oneoverarea;
 582       float dady = b * setup->oneoverarea;
 583
 584       assert(i <= 3);
 585
 586       setup->coef.dadx[1 + attrib][i] = dadx;
 587       setup->coef.dady[1 + attrib][i] = dady;
 588
 589       /* calculate a0 as the value which would be sampled for the
 590        * fragment at (0,0), taking into account that we want to sample at
 591        * pixel centers, in other words (0.5, 0.5).
 592        *
 593        * this is neat but unfortunately not a good way to do things for
 594        * triangles with very large values of dadx or dady as it will
 595        * result in the subtraction and re-addition from a0 of a very
 596        * large number, which means we'll end up loosing a lot of the
 597        * fractional bits and precision from a0.  the way to fix this is
 598        * to define a0 as the sample at a pixel center somewhere near vmin
 599        * instead - i'll switch to this later.
 600        */
 601       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
 602                      (dadx * (setup->vmin[0][0] - 0.5f) +
 603                       dady * (setup->vmin[0][1] - 0.5f)));
 604
 605       /*
 606       debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 607                    slot, "xyzw"[i],
 608                    setup->coef[slot].a0[i],
 609                    setup->coef[slot].dadx[i],
 610                    setup->coef[slot].dady[i]);
 611       */
 612    }
 613 }
 614
 615
 616 /**
 617  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 618  * for a triangle.
 619  * We basically multiply the vertex value by 1/w before computing
 620  * the plane coefficients (a0, dadx, dady).
 621  * Later, when we compute the value at a particular fragment position we'll
 622  * divide the interpolated value by the interpolated W at that fragment.
 623  */
 624 static void tri_persp_coeff( struct setup_context *setup,
 625                              unsigned attrib,
 626                              uint vertSlot)
 627 {
 628    unsigned i;
 629    for (i = 0; i < NUM_CHANNELS; ++i) {
 630       /* premultiply by 1/w  (v[0][3] is always W):
 631        */
 632       float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
 633       float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
 634       float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
 635       float botda = mida - mina;
 636       float majda = maxa - mina;
 637       float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
 638       float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
 639       float dadx = a * setup->oneoverarea;
 640       float dady = b * setup->oneoverarea;
 641
 642       /*
 643       debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
 644                    setup->vmin[vertSlot][i],
 645                    setup->vmid[vertSlot][i],
 646                    setup->vmax[vertSlot][i]
 647              );
 648       */
 649       assert(i <= 3);
 650
 651       setup->coef.dadx[1 + attrib][i] = dadx;
 652       setup->coef.dady[1 + attrib][i] = dady;
 653       setup->coef.a0[1 + attrib][i] = (mina -
 654                      (dadx * (setup->vmin[0][0] - 0.5f) +
 655                       dady * (setup->vmin[0][1] - 0.5f)));
 656    }
 657 }
 658
 659
 660 /**
 661  * Special coefficient setup for gl_FragCoord.
 662  * X and Y are trivial, though Y has to be inverted for OpenGL.
 663  * Z and W are copied from posCoef which should have already been computed.
 664  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 665  */
 666 static void
 667 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 668 {
 669    /*X*/
 670    setup->coef.a0[1 + slot][0] = 0;
 671    setup->coef.dadx[1 + slot][0] = 1.0;
 672    setup->coef.dady[1 + slot][0] = 0.0;
 673    /*Y*/
 674    setup->coef.a0[1 + slot][1] = 0.0;
 675    setup->coef.dadx[1 + slot][1] = 0.0;
 676    setup->coef.dady[1 + slot][1] = 1.0;
 677    /*Z*/
 678    setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2];
 679    setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2];
 680    setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2];
 681    /*W*/
 682    setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3];
 683    setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3];
 684    setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3];
 685 }
 686
 687
 688
 689 /**
 690  * Compute the setup->coef[] array dadx, dady, a0 values.
 691  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
 692  */
 693 static void setup_tri_coefficients( struct setup_context *setup )
 694 {
 695    struct llvmpipe_context *llvmpipe = setup->llvmpipe;
 696    const struct lp_fragment_shader *lpfs = llvmpipe->fs;
 697    const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
 698    uint fragSlot;
 699
 700    /* z and w are done by linear interpolation:
 701     */
 702    tri_pos_coeff(setup, 0, 2);
 703    tri_pos_coeff(setup, 0, 3);
 704
 705    /* setup interpolation for all the remaining attributes:
 706     */
 707    for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
 708       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
 709
 710       switch (vinfo->attrib[fragSlot].interp_mode) {
 711       case INTERP_CONSTANT:
 712          const_coeff(setup, fragSlot, vertSlot);
 713          break;
 714       case INTERP_LINEAR:
 715          tri_linear_coeff(setup, fragSlot, vertSlot);
 716          break;
 717       case INTERP_PERSPECTIVE:
 718          tri_persp_coeff(setup, fragSlot, vertSlot);
 719          break;
 720       case INTERP_POS:
 721          setup_fragcoord_coeff(setup, fragSlot);
 722          break;
 723       default:
 724          assert(0);
 725       }
 726
 727       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
 728          setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
 729          setup->coef.dadx[1 + fragSlot][0] = 0.0;
 730          setup->coef.dady[1 + fragSlot][0] = 0.0;
 731       }
 732    }
 733 }
 734
 735
 736
 737 static void setup_tri_edges( struct setup_context *setup )
 738 {
 739    float vmin_x = setup->vmin[0][0] + 0.5f;
 740    float vmid_x = setup->vmid[0][0] + 0.5f;
 741
 742    float vmin_y = setup->vmin[0][1] - 0.5f;
 743    float vmid_y = setup->vmid[0][1] - 0.5f;
 744    float vmax_y = setup->vmax[0][1] - 0.5f;
 745
 746    setup->emaj.sy = ceilf(vmin_y);
 747    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
 748    setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
 749    setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
 750
 751    setup->etop.sy = ceilf(vmid_y);
 752    setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
 753    setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
 754    setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
 755
 756    setup->ebot.sy = ceilf(vmin_y);
 757    setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
 758    setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
 759    setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
 760 }
 761
 762
 763 /**
 764  * Render the upper or lower half of a triangle.
 765  * Scissoring/cliprect is applied here too.
 766  */
 767 static void subtriangle( struct setup_context *setup,
 768                          struct edge *eleft,
 769                          struct edge *eright,
 770                          unsigned lines )
 771 {
 772    const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
 773    const int minx = (int) cliprect->minx;
 774    const int maxx = (int) cliprect->maxx;
 775    const int miny = (int) cliprect->miny;
 776    const int maxy = (int) cliprect->maxy;
 777    int y, start_y, finish_y;
 778    int sy = (int)eleft->sy;
 779
 780    assert((int)eleft->sy == (int) eright->sy);
 781
 782    /* clip top/bottom */
 783    start_y = sy;
 784    if (start_y < miny)
 785       start_y = miny;
 786
 787    finish_y = sy + lines;
 788    if (finish_y > maxy)
 789       finish_y = maxy;
 790
 791    start_y -= sy;
 792    finish_y -= sy;
 793
 794    /*
 795    debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
 796    */
 797
 798    for (y = start_y; y < finish_y; y++) {
 799
 800       /* avoid accumulating adds as floats don't have the precision to
 801        * accurately iterate large triangle edges that way.  luckily we
 802        * can just multiply these days.
 803        *
 804        * this is all drowned out by the attribute interpolation anyway.
 805        */
 806       int left = (int)(eleft->sx + y * eleft->dxdy);
 807       int right = (int)(eright->sx + y * eright->dxdy);
 808
 809       /* clip left/right */
 810       if (left < minx)
 811          left = minx;
 812       if (right > maxx)
 813          right = maxx;
 814
 815       if (left < right) {
 816          int _y = sy + y;
 817          if (block(_y) != setup->span.y) {
 818             flush_spans(setup);
 819             setup->span.y = block(_y);
 820          }
 821
 822          setup->span.left[_y&1] = left;
 823          setup->span.right[_y&1] = right;
 824       }
 825    }
 826
 827
 828    /* save the values so that emaj can be restarted:
 829     */
 830    eleft->sx += lines * eleft->dxdy;
 831    eright->sx += lines * eright->dxdy;
 832    eleft->sy += lines;
 833    eright->sy += lines;
 834 }
 835
 836
 837 /**
 838  * Recalculate prim's determinant.  This is needed as we don't have
 839  * get this information through the vbuf_render interface & we must
 840  * calculate it here.
 841  */
 842 static float
 843 calc_det( const float (*v0)[4],
 844           const float (*v1)[4],
 845           const float (*v2)[4] )
 846 {
 847    /* edge vectors e = v0 - v2, f = v1 - v2 */
 848    const float ex = v0[0][0] - v2[0][0];
 849    const float ey = v0[0][1] - v2[0][1];
 850    const float fx = v1[0][0] - v2[0][0];
 851    const float fy = v1[0][1] - v2[0][1];
 852
 853    /* det = cross(e,f).z */
 854    return ex * fy - ey * fx;
 855 }
 856
 857
 858 /**
 859  * Do setup for triangle rasterization, then render the triangle.
 860  */
 861 void llvmpipe_setup_tri( struct setup_context *setup,
 862                 const float (*v0)[4],
 863                 const float (*v1)[4],
 864                 const float (*v2)[4] )
 865 {
 866    float det;
 867
 868 #if DEBUG_VERTS
 869    debug_printf("Setup triangle:\n");
 870    print_vertex(setup, v0);
 871    print_vertex(setup, v1);
 872    print_vertex(setup, v2);
 873 #endif
 874
 875    if (setup->llvmpipe->no_rast)
 876       return;
 877
 878    det = calc_det(v0, v1, v2);
 879    /*
 880    debug_printf("%s\n", __FUNCTION__ );
 881    */
 882
 883 #if DEBUG_FRAGS
 884    setup->numFragsEmitted = 0;
 885    setup->numFragsWritten = 0;
 886 #endif
 887
 888    if (cull_tri( setup, det ))
 889       return;
 890
 891    if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
 892       return;
 893    setup_tri_coefficients( setup );
 894    setup_tri_edges( setup );
 895
 896    assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
 897
 898    setup->span.y = 0;
 899    setup->span.right[0] = 0;
 900    setup->span.right[1] = 0;
 901    /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
 902
 903    /*   init_constant_attribs( setup ); */
 904
 905    if (setup->oneoverarea < 0.0) {
 906       /* emaj on left:
 907        */
 908       subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
 909       subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
 910    }
 911    else {
 912       /* emaj on right:
 913        */
 914       subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
 915       subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
 916    }
 917
 918    flush_spans( setup );
 919
 920 #if DEBUG_FRAGS
 921    printf("Tri: %u frags emitted, %u written\n",
 922           setup->numFragsEmitted,
 923           setup->numFragsWritten);
 924 #endif
 925 }
 926
 927
 928
 929 /**
 930  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 931  * for a line.
 932  */
 933 static void
 934 linear_pos_coeff(struct setup_context *setup,
 935                  uint vertSlot, uint i)
 936 {
 937    const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 938    const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 939    const float dady = da * setup->emaj.dy * setup->oneoverarea;
 940    setup->coef.dadx[0][i] = dadx;
 941    setup->coef.dady[0][i] = dady;
 942    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
 943                            (dadx * (setup->vmin[0][0] - 0.5f) +
 944                             dady * (setup->vmin[0][1] - 0.5f)));
 945 }
 946
 947
 948 /**
 949  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 950  * for a line.
 951  */
 952 static void
 953 line_linear_coeff(struct setup_context *setup,
 954                   unsigned attrib,
 955                   uint vertSlot)
 956 {
 957    unsigned i;
 958    for (i = 0; i < NUM_CHANNELS; ++i) {
 959       const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
 960       const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 961       const float dady = da * setup->emaj.dy * setup->oneoverarea;
 962       setup->coef.dadx[1 + attrib][i] = dadx;
 963       setup->coef.dady[1 + attrib][i] = dady;
 964       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
 965                      (dadx * (setup->vmin[0][0] - 0.5f) +
 966                       dady * (setup->vmin[0][1] - 0.5f)));
 967    }
 968 }
 969
 970
 971 /**
 972  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 973  * for a line.
 974  */
 975 static void
 976 line_persp_coeff(struct setup_context *setup,
 977                  unsigned attrib,
 978                  uint vertSlot)
 979 {
 980    unsigned i;
 981    for (i = 0; i < NUM_CHANNELS; ++i) {
 982       /* XXX double-check/verify this arithmetic */
 983       const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
 984       const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
 985       const float da = a1 - a0;
 986       const float dadx = da * setup->emaj.dx * setup->oneoverarea;
 987       const float dady = da * setup->emaj.dy * setup->oneoverarea;
 988       setup->coef.dadx[1 + attrib][i] = dadx;
 989       setup->coef.dady[1 + attrib][i] = dady;
 990       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
 991                      (dadx * (setup->vmin[0][0] - 0.5f) +
 992                       dady * (setup->vmin[0][1] - 0.5f)));
 993    }
 994 }
 995
 996
 997 /**
 998  * Compute the setup->coef[] array dadx, dady, a0 values.
 999  * Must be called after setup->vmin,vmax are initialized.
1000  */
1001 static INLINE boolean
1002 setup_line_coefficients(struct setup_context *setup,
1003                         const float (*v0)[4],
1004                         const float (*v1)[4])
1005 {
1006    struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1007    const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1008    const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1009    uint fragSlot;
1010    float area;
1011
1012    /* use setup->vmin, vmax to point to vertices */
1013    if (llvmpipe->rasterizer->flatshade_first)
1014       setup->vprovoke = v0;
1015    else
1016       setup->vprovoke = v1;
1017    setup->vmin = v0;
1018    setup->vmax = v1;
1019
1020    setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
1021    setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
1022
1023    /* NOTE: this is not really area but something proportional to it */
1024    area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
1025    if (area == 0.0f || util_is_inf_or_nan(area))
1026       return FALSE;
1027    setup->oneoverarea = 1.0f / area;
1028
1029    /* z and w are done by linear interpolation:
1030     */
1031    linear_pos_coeff(setup, 0, 2);
1032    linear_pos_coeff(setup, 0, 3);
1033
1034    /* setup interpolation for all the remaining attributes:
1035     */
1036    for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1037       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1038
1039       switch (vinfo->attrib[fragSlot].interp_mode) {
1040       case INTERP_CONSTANT:
1041          const_coeff(setup, fragSlot, vertSlot);
1042          break;
1043       case INTERP_LINEAR:
1044          line_linear_coeff(setup, fragSlot, vertSlot);
1045          break;
1046       case INTERP_PERSPECTIVE:
1047          line_persp_coeff(setup, fragSlot, vertSlot);
1048          break;
1049       case INTERP_POS:
1050          setup_fragcoord_coeff(setup, fragSlot);
1051          break;
1052       default:
1053          assert(0);
1054       }
1055
1056       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1057          setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1058          setup->coef.dadx[1 + fragSlot][0] = 0.0;
1059          setup->coef.dady[1 + fragSlot][0] = 0.0;
1060       }
1061    }
1062    return TRUE;
1063 }
1064
1065
1066 /**
1067  * Plot a pixel in a line segment.
1068  */
1069 static INLINE void
1070 plot(struct setup_context *setup, int x, int y)
1071 {
1072    const int iy = y & 1;
1073    const int ix = x & 1;
1074    const int quadX = x - ix;
1075    const int quadY = y - iy;
1076    const int mask = (1 << ix) << (2 * iy);
1077
1078    if (quadX != setup->quad[0].input.x0 ||
1079        quadY != setup->quad[0].input.y0)
1080    {
1081       /* flush prev quad, start new quad */
1082
1083       if (setup->quad[0].input.x0 != -1)
1084          clip_emit_quad( setup, &setup->quad[0] );
1085
1086       setup->quad[0].input.x0 = quadX;
1087       setup->quad[0].input.y0 = quadY;
1088       setup->quad[0].inout.mask = 0x0;
1089    }
1090
1091    setup->quad[0].inout.mask |= mask;
1092 }
1093
1094
1095 /**
1096  * Do setup for line rasterization, then render the line.
1097  * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
1098  * to handle stippling and wide lines.
1099  */
1100 void
1101 llvmpipe_setup_line(struct setup_context *setup,
1102            const float (*v0)[4],
1103            const float (*v1)[4])
1104 {
1105    int x0 = (int) v0[0][0];
1106    int x1 = (int) v1[0][0];
1107    int y0 = (int) v0[0][1];
1108    int y1 = (int) v1[0][1];
1109    int dx = x1 - x0;
1110    int dy = y1 - y0;
1111    int xstep, ystep;
1112
1113 #if DEBUG_VERTS
1114    debug_printf("Setup line:\n");
1115    print_vertex(setup, v0);
1116    print_vertex(setup, v1);
1117 #endif
1118
1119    if (setup->llvmpipe->no_rast)
1120       return;
1121
1122    if (dx == 0 && dy == 0)
1123       return;
1124
1125    if (!setup_line_coefficients(setup, v0, v1))
1126       return;
1127
1128    assert(v0[0][0] < 1.0e9);
1129    assert(v0[0][1] < 1.0e9);
1130    assert(v1[0][0] < 1.0e9);
1131    assert(v1[0][1] < 1.0e9);
1132
1133    if (dx < 0) {
1134       dx = -dx;   /* make positive */
1135       xstep = -1;
1136    }
1137    else {
1138       xstep = 1;
1139    }
1140
1141    if (dy < 0) {
1142       dy = -dy;   /* make positive */
1143       ystep = -1;
1144    }
1145    else {
1146       ystep = 1;
1147    }
1148
1149    assert(dx >= 0);
1150    assert(dy >= 0);
1151    assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES);
1152
1153    setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
1154    setup->quad[0].inout.mask = 0x0;
1155
1156    /* XXX temporary: set coverage to 1.0 so the line appears
1157     * if AA mode happens to be enabled.
1158     */
1159    setup->quad[0].input.coverage[0] =
1160    setup->quad[0].input.coverage[1] =
1161    setup->quad[0].input.coverage[2] =
1162    setup->quad[0].input.coverage[3] = 1.0;
1163
1164    if (dx > dy) {
1165       /*** X-major line ***/
1166       int i;
1167       const int errorInc = dy + dy;
1168       int error = errorInc - dx;
1169       const int errorDec = error - dx;
1170
1171       for (i = 0; i < dx; i++) {
1172          plot(setup, x0, y0);
1173
1174          x0 += xstep;
1175          if (error < 0) {
1176             error += errorInc;
1177          }
1178          else {
1179             error += errorDec;
1180             y0 += ystep;
1181          }
1182       }
1183    }
1184    else {
1185       /*** Y-major line ***/
1186       int i;
1187       const int errorInc = dx + dx;
1188       int error = errorInc - dy;
1189       const int errorDec = error - dy;
1190
1191       for (i = 0; i < dy; i++) {
1192          plot(setup, x0, y0);
1193
1194          y0 += ystep;
1195          if (error < 0) {
1196             error += errorInc;
1197          }
1198          else {
1199             error += errorDec;
1200             x0 += xstep;
1201          }
1202       }
1203    }
1204
1205    /* draw final quad */
1206    if (setup->quad[0].inout.mask) {
1207       clip_emit_quad( setup, &setup->quad[0] );
1208    }
1209 }
1210
1211
1212 static void
1213 point_persp_coeff(struct setup_context *setup,
1214                   const float (*vert)[4],
1215                   unsigned attrib,
1216                   uint vertSlot)
1217 {
1218    unsigned i;
1219    for(i = 0; i < NUM_CHANNELS; ++i) {
1220       setup->coef.dadx[1 + attrib][i] = 0.0F;
1221       setup->coef.dady[1 + attrib][i] = 0.0F;
1222       setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3];
1223    }
1224 }
1225
1226
1227 /**
1228  * Do setup for point rasterization, then render the point.
1229  * Round or square points...
1230  * XXX could optimize a lot for 1-pixel points.
1231  */
1232 void
1233 llvmpipe_setup_point( struct setup_context *setup,
1234              const float (*v0)[4] )
1235 {
1236    struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1237    const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1238    const int sizeAttr = setup->llvmpipe->psize_slot;
1239    const float size
1240       = sizeAttr > 0 ? v0[sizeAttr][0]
1241       : setup->llvmpipe->rasterizer->point_size;
1242    const float halfSize = 0.5F * size;
1243    const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth;
1244    const float x = v0[0][0];  /* Note: data[0] is always position */
1245    const float y = v0[0][1];
1246    const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1247    uint fragSlot;
1248
1249 #if DEBUG_VERTS
1250    debug_printf("Setup point:\n");
1251    print_vertex(setup, v0);
1252 #endif
1253
1254    if (llvmpipe->no_rast)
1255       return;
1256
1257    assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS);
1258
1259    /* For points, all interpolants are constant-valued.
1260     * However, for point sprites, we'll need to setup texcoords appropriately.
1261     * XXX: which coefficients are the texcoords???
1262     * We may do point sprites as textured quads...
1263     *
1264     * KW: We don't know which coefficients are texcoords - ultimately
1265     * the choice of what interpolation mode to use for each attribute
1266     * should be determined by the fragment program, using
1267     * per-attribute declaration statements that include interpolation
1268     * mode as a parameter.  So either the fragment program will have
1269     * to be adjusted for pointsprite vs normal point behaviour, or
1270     * otherwise a special interpolation mode will have to be defined
1271     * which matches the required behaviour for point sprites.  But -
1272     * the latter is not a feature of normal hardware, and as such
1273     * probably should be ruled out on that basis.
1274     */
1275    setup->vprovoke = v0;
1276
1277    /* setup Z, W */
1278    const_pos_coeff(setup, 0, 2);
1279    const_pos_coeff(setup, 0, 3);
1280
1281    for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1282       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1283
1284       switch (vinfo->attrib[fragSlot].interp_mode) {
1285       case INTERP_CONSTANT:
1286          /* fall-through */
1287       case INTERP_LINEAR:
1288          const_coeff(setup, fragSlot, vertSlot);
1289          break;
1290       case INTERP_PERSPECTIVE:
1291          point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot);
1292          break;
1293       case INTERP_POS:
1294          setup_fragcoord_coeff(setup, fragSlot);
1295          break;
1296       default:
1297          assert(0);
1298       }
1299
1300       if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1301          setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1302          setup->coef.dadx[1 + fragSlot][0] = 0.0;
1303          setup->coef.dady[1 + fragSlot][0] = 0.0;
1304       }
1305    }
1306
1307
1308    if (halfSize <= 0.5 && !round) {
1309       /* special case for 1-pixel points */
1310       const int ix = ((int) x) & 1;
1311       const int iy = ((int) y) & 1;
1312       setup->quad[0].input.x0 = (int) x - ix;
1313       setup->quad[0].input.y0 = (int) y - iy;
1314       setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
1315       clip_emit_quad( setup, &setup->quad[0] );
1316    }
1317    else {
1318       if (round) {
1319          /* rounded points */
1320          const int ixmin = block((int) (x - halfSize));
1321          const int ixmax = block((int) (x + halfSize));
1322          const int iymin = block((int) (y - halfSize));
1323          const int iymax = block((int) (y + halfSize));
1324          const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1325          const float rmax = halfSize + 0.7071F;
1326          const float rmin2 = MAX2(0.0F, rmin * rmin);
1327          const float rmax2 = rmax * rmax;
1328          const float cscale = 1.0F / (rmax2 - rmin2);
1329          int ix, iy;
1330
1331          for (iy = iymin; iy <= iymax; iy += 2) {
1332             for (ix = ixmin; ix <= ixmax; ix += 2) {
1333                float dx, dy, dist2, cover;
1334
1335                setup->quad[0].inout.mask = 0x0;
1336
1337                dx = (ix + 0.5f) - x;
1338                dy = (iy + 0.5f) - y;
1339                dist2 = dx * dx + dy * dy;
1340                if (dist2 <= rmax2) {
1341                   cover = 1.0F - (dist2 - rmin2) * cscale;
1342                   setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1343                   setup->quad[0].inout.mask |= MASK_TOP_LEFT;
1344                }
1345
1346                dx = (ix + 1.5f) - x;
1347                dy = (iy + 0.5f) - y;
1348                dist2 = dx * dx + dy * dy;
1349                if (dist2 <= rmax2) {
1350                   cover = 1.0F - (dist2 - rmin2) * cscale;
1351                   setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1352                   setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
1353                }
1354
1355                dx = (ix + 0.5f) - x;
1356                dy = (iy + 1.5f) - y;
1357                dist2 = dx * dx + dy * dy;
1358                if (dist2 <= rmax2) {
1359                   cover = 1.0F - (dist2 - rmin2) * cscale;
1360                   setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1361                   setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
1362                }
1363
1364                dx = (ix + 1.5f) - x;
1365                dy = (iy + 1.5f) - y;
1366                dist2 = dx * dx + dy * dy;
1367                if (dist2 <= rmax2) {
1368                   cover = 1.0F - (dist2 - rmin2) * cscale;
1369                   setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1370                   setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
1371                }
1372
1373                if (setup->quad[0].inout.mask) {
1374                   setup->quad[0].input.x0 = ix;
1375                   setup->quad[0].input.y0 = iy;
1376                   clip_emit_quad( setup, &setup->quad[0] );
1377                }
1378             }
1379          }
1380       }
1381       else {
1382          /* square points */
1383          const int xmin = (int) (x + 0.75 - halfSize);
1384          const int ymin = (int) (y + 0.25 - halfSize);
1385          const int xmax = xmin + (int) size;
1386          const int ymax = ymin + (int) size;
1387          /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1388          const int ixmin = block(xmin);
1389          const int ixmax = block(xmax - 1);
1390          const int iymin = block(ymin);
1391          const int iymax = block(ymax - 1);
1392          int ix, iy;
1393
1394          /*
1395          debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1396          */
1397          for (iy = iymin; iy <= iymax; iy += 2) {
1398             uint rowMask = 0xf;
1399             if (iy < ymin) {
1400                /* above the top edge */
1401                rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1402             }
1403             if (iy + 1 >= ymax) {
1404                /* below the bottom edge */
1405                rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1406             }
1407
1408             for (ix = ixmin; ix <= ixmax; ix += 2) {
1409                uint mask = rowMask;
1410
1411                if (ix < xmin) {
1412                   /* fragment is past left edge of point, turn off left bits */
1413                   mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1414                }
1415                if (ix + 1 >= xmax) {
1416                   /* past the right edge */
1417                   mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1418                }
1419
1420                setup->quad[0].inout.mask = mask;
1421                setup->quad[0].input.x0 = ix;
1422                setup->quad[0].input.y0 = iy;
1423                clip_emit_quad( setup, &setup->quad[0] );
1424             }
1425          }
1426       }
1427    }
1428 }
1429
1430 void llvmpipe_setup_prepare( struct setup_context *setup )
1431 {
1432    struct llvmpipe_context *lp = setup->llvmpipe;
1433
1434    if (lp->dirty) {
1435       llvmpipe_update_derived(lp);
1436    }
1437
1438    if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1439        lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1440        lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1441       /* we'll do culling */
1442       setup->winding = lp->rasterizer->cull_mode;
1443    }
1444    else {
1445       /* 'draw' will do culling */
1446       setup->winding = PIPE_WINDING_NONE;
1447    }
1448 }
1449
1450
1451
1452 void llvmpipe_setup_destroy_context( struct setup_context *setup )
1453 {
1454    align_free( setup );
1455 }
1456
1457
1458 /**
1459  * Create a new primitive setup/render stage.
1460  */
1461 struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe )
1462 {
1463    struct setup_context *setup;
1464    unsigned i;
1465
1466    setup = align_malloc(sizeof(struct setup_context), 16);
1467    if (!setup)
1468       return NULL;
1469
1470    memset(setup, 0, sizeof *setup);
1471    setup->llvmpipe = llvmpipe;
1472
1473    for (i = 0; i < MAX_QUADS; i++) {
1474       setup->quad[i].coef = &setup->coef;
1475    }
1476
1477    setup->span.left[0] = 1000000;     /* greater than right[0] */
1478    setup->span.left[1] = 1000000;     /* greater than right[1] */
1479
1480    return setup;
1481 }
1482