src/mesa/pipe/cell/spu/spu_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * Triangle rendering within a tile.
  30  */
  31
  32 #include "pipe/p_compiler.h"
  33 #include "pipe/p_format.h"
  34 #include "pipe/p_util.h"
  35 #include "spu_colorpack.h"
  36 #include "spu_main.h"
  37 #include "spu_texture.h"
  38 #include "spu_tile.h"
  39 #include "spu_tri.h"
  40
  41 #include "spu_ztest.h"
  42
  43
  44 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
  45 typedef vector unsigned int mask_t;
  46
  47
  48 /**
  49  * Simplified types taken from other parts of Gallium
  50  */
  51 struct vertex_header {
  52    float data[0][4];
  53 };
  54
  55
  56
  57 /* XXX fix this */
  58 #undef CEILF
  59 #define CEILF(X) ((float) (int) ((X) + 0.99999))
  60
  61
  62 #define QUAD_TOP_LEFT     0
  63 #define QUAD_TOP_RIGHT    1
  64 #define QUAD_BOTTOM_LEFT  2
  65 #define QUAD_BOTTOM_RIGHT 3
  66 #define MASK_TOP_LEFT     (1 << QUAD_TOP_LEFT)
  67 #define MASK_TOP_RIGHT    (1 << QUAD_TOP_RIGHT)
  68 #define MASK_BOTTOM_LEFT  (1 << QUAD_BOTTOM_LEFT)
  69 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
  70 #define MASK_ALL          0xf
  71
  72
  73 #define DEBUG_VERTS 0
  74
  75 /**
  76  * Triangle edge info
  77  */
  78 struct edge {
  79    float dx;            /**< X(v1) - X(v0), used only during setup */
  80    float dy;            /**< Y(v1) - Y(v0), used only during setup */
  81    float dxdy;          /**< dx/dy */
  82    float sx, sy;        /**< first sample point coord */
  83    int lines;           /**< number of lines on this edge */
  84 };
  85
  86
  87 struct interp_coef
  88 {
  89    float4 a0;
  90    float4 dadx;
  91    float4 dady;
  92 };
  93
  94
  95 /**
  96  * Triangle setup info (derived from draw_stage).
  97  * Also used for line drawing (taking some liberties).
  98  */
  99 struct setup_stage {
 100
 101    /* Vertices are just an array of floats making up each attribute in
 102     * turn.  Currently fixed at 4 floats, but should change in time.
 103     * Codegen will help cope with this.
 104     */
 105    const struct vertex_header *vmax;
 106    const struct vertex_header *vmid;
 107    const struct vertex_header *vmin;
 108    const struct vertex_header *vprovoke;
 109
 110    struct edge ebot;
 111    struct edge etop;
 112    struct edge emaj;
 113
 114    float oneoverarea;
 115
 116    uint tx, ty;
 117
 118    int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
 119
 120 #if 0
 121    struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
 122 #else
 123    struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
 124 #endif
 125
 126 #if 0
 127    struct quad_header quad;
 128 #endif
 129
 130    struct {
 131       int left[2];   /**< [0] = row0, [1] = row1 */
 132       int right[2];
 133       int y;
 134       unsigned y_flags;
 135       unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
 136    } span;
 137 };
 138
 139
 140
 141 static struct setup_stage setup;
 142
 143
 144
 145
 146 #if 0
 147 /**
 148  * Basically a cast wrapper.
 149  */
 150 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
 151 {
 152    return (struct setup_stage *)stage;
 153 }
 154 #endif
 155
 156 #if 0
 157 /**
 158  * Clip setup.quad against the scissor/surface bounds.
 159  */
 160 static INLINE void
 161 quad_clip(struct setup_stage *setup)
 162 {
 163    const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
 164    const int minx = (int) cliprect->minx;
 165    const int maxx = (int) cliprect->maxx;
 166    const int miny = (int) cliprect->miny;
 167    const int maxy = (int) cliprect->maxy;
 168
 169    if (setup.quad.x0 >= maxx ||
 170        setup.quad.y0 >= maxy ||
 171        setup.quad.x0 + 1 < minx ||
 172        setup.quad.y0 + 1 < miny) {
 173       /* totally clipped */
 174       setup.quad.mask = 0x0;
 175       return;
 176    }
 177    if (setup.quad.x0 < minx)
 178       setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
 179    if (setup.quad.y0 < miny)
 180       setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 181    if (setup.quad.x0 == maxx - 1)
 182       setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
 183    if (setup.quad.y0 == maxy - 1)
 184       setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
 185 }
 186 #endif
 187
 188 #if 0
 189 /**
 190  * Emit a quad (pass to next stage) with clipping.
 191  */
 192 static INLINE void
 193 clip_emit_quad(struct setup_stage *setup)
 194 {
 195    quad_clip(setup);
 196    if (setup.quad.mask) {
 197       struct softpipe_context *sp = setup.softpipe;
 198       sp->quad.first->run(sp->quad.first, &setup.quad);
 199    }
 200 }
 201 #endif
 202
 203 /**
 204  * Evaluate attribute coefficients (plane equations) to compute
 205  * attribute values for the four fragments in a quad.
 206  * Eg: four colors will be compute.
 207  */
 208 static INLINE void
 209 eval_coeff(uint slot, float x, float y, float4 result[4])
 210 {
 211    switch (spu.vertex_info.interp_mode[slot]) {
 212    case INTERP_CONSTANT:
 213       result[QUAD_TOP_LEFT] =
 214       result[QUAD_TOP_RIGHT] =
 215       result[QUAD_BOTTOM_LEFT] =
 216       result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0;
 217       break;
 218
 219    case INTERP_LINEAR:
 220       /* fall-through, for now */
 221    default:
 222       {
 223          register vector float dadx = setup.coef[slot].dadx.v;
 224          register vector float dady = setup.coef[slot].dady.v;
 225          register vector float topLeft
 226             = spu_add(setup.coef[slot].a0.v,
 227                       spu_add(spu_mul(spu_splats(x), dadx),
 228                               spu_mul(spu_splats(y), dady)));
 229
 230          result[QUAD_TOP_LEFT].v = topLeft;
 231          result[QUAD_TOP_RIGHT].v = spu_add(topLeft, dadx);
 232          result[QUAD_BOTTOM_LEFT].v = spu_add(topLeft, dady);
 233          result[QUAD_BOTTOM_RIGHT].v = spu_add(spu_add(topLeft, dadx), dady);
 234       }
 235    }
 236 }
 237
 238
 239 static INLINE vector float
 240 eval_z(float x, float y)
 241 {
 242    const uint slot = 0;
 243    const float dzdx = setup.coef[slot].dadx.f[2];
 244    const float dzdy = setup.coef[slot].dady.f[2];
 245    const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
 246    const vector float topLeftv = spu_splats(topLeft);
 247    const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
 248    return spu_add(topLeftv, derivs);
 249 }
 250
 251
 252 static INLINE mask_t
 253 do_depth_test(int x, int y, mask_t quadmask)
 254 {
 255    float4 zvals;
 256    mask_t mask;
 257
 258    zvals.v = eval_z((float) x, (float) y);
 259
 260    if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
 261       int ix = (x - setup.cliprect_minx) / 4;
 262       int iy = (y - setup.cliprect_miny) / 2;
 263       mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
 264    }
 265    else {
 266       int ix = (x - setup.cliprect_minx) / 2;
 267       int iy = (y - setup.cliprect_miny) / 2;
 268       mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
 269    }
 270
 271    if (spu_extract(spu_orx(mask), 0))
 272       spu.cur_ztile_status = TILE_STATUS_DIRTY;
 273
 274    return mask;
 275 }
 276
 277
 278 /**
 279  * Emit a quad (pass to next stage).  No clipping is done.
 280  * Note: about 1/5 to 1/7 of the time, mask is zero and this function
 281  * should be skipped.  But adding the test for that slows things down
 282  * overall.
 283  */
 284 static INLINE void
 285 emit_quad( int x, int y, mask_t mask )
 286 {
 287 #if 0
 288    struct softpipe_context *sp = setup.softpipe;
 289    setup.quad.x0 = x;
 290    setup.quad.y0 = y;
 291    setup.quad.mask = mask;
 292    sp->quad.first->run(sp->quad.first, &setup.quad);
 293 #else
 294
 295    if (spu.depth_stencil.depth.enabled) {
 296       mask = do_depth_test(x, y, mask);
 297    }
 298
 299    /* If any bits in mask are set... */
 300    if (spu_extract(spu_orx(mask), 0)) {
 301       const int ix = x - setup.cliprect_minx;
 302       const int iy = y - setup.cliprect_miny;
 303
 304       spu.cur_ctile_status = TILE_STATUS_DIRTY;
 305
 306       if (spu.texture.start) {
 307          /* texture mapping */
 308          float4 texcoords[4];
 309          eval_coeff(2, (float) x, (float) y, texcoords);
 310
 311          if (spu_extract(mask, 0))
 312             spu.ctile.ui[iy][ix] = sample_texture(texcoords[0]);
 313          if (spu_extract(mask, 1))
 314             spu.ctile.ui[iy][ix+1] = sample_texture(texcoords[1]);
 315          if (spu_extract(mask, 2))
 316             spu.ctile.ui[iy+1][ix] = sample_texture(texcoords[2]);
 317          if (spu_extract(mask, 3))
 318             spu.ctile.ui[iy+1][ix+1] = sample_texture(texcoords[3]);
 319       }
 320       else {
 321          /* simple shading */
 322          const vector unsigned char shuffle = spu.color_shuffle;
 323          float4 colors[4];
 324          eval_coeff(1, (float) x, (float) y, colors);
 325
 326          if (spu_extract(mask, 0))
 327             spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0].v, shuffle);
 328          if (spu_extract(mask, 1))
 329             spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1].v, shuffle);
 330          if (spu_extract(mask, 2))
 331             spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2].v, shuffle);
 332          if (spu_extract(mask, 3))
 333             spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3].v, shuffle);
 334       }
 335
 336 #if 0
 337       /* SIMD_Z with swizzled color buffer (someday) */
 338       vector unsigned int uicolors = *((vector unsigned int *) &colors);
 339       spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask);
 340 #endif
 341    }
 342
 343 #endif
 344 }
 345
 346
 347 /**
 348  * Given an X or Y coordinate, return the block/quad coordinate that it
 349  * belongs to.
 350  */
 351 static INLINE int block( int x )
 352 {
 353    return x & ~1;
 354 }
 355
 356
 357 /**
 358  * Compute mask which indicates which pixels in the 2x2 quad are actually inside
 359  * the triangle's bounds.
 360  * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
 361  */
 362 static INLINE mask_t calculate_mask( int x )
 363 {
 364    /* This is a little tricky.
 365     * Use & instead of && to avoid branches.
 366     * Use negation to convert true/false to ~0/0 values.
 367     */
 368    mask_t mask;
 369    mask = spu_insert(-((x   >= setup.span.left[0]) & (x   < setup.span.right[0])), mask, 0);
 370    mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
 371    mask = spu_insert(-((x   >= setup.span.left[1]) & (x   < setup.span.right[1])), mask, 2);
 372    mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
 373    return mask;
 374 }
 375
 376
 377 /**
 378  * Render a horizontal span of quads
 379  */
 380 static void flush_spans( void )
 381 {
 382    int minleft, maxright;
 383    int x;
 384
 385    switch (setup.span.y_flags) {
 386    case 0x3:
 387       /* both odd and even lines written (both quad rows) */
 388       minleft = MIN2(setup.span.left[0], setup.span.left[1]);
 389       maxright = MAX2(setup.span.right[0], setup.span.right[1]);
 390       break;
 391
 392    case 0x1:
 393       /* only even line written (quad top row) */
 394       minleft = setup.span.left[0];
 395       maxright = setup.span.right[0];
 396       break;
 397
 398    case 0x2:
 399       /* only odd line written (quad bottom row) */
 400       minleft = setup.span.left[1];
 401       maxright = setup.span.right[1];
 402       break;
 403
 404    default:
 405       return;
 406    }
 407
 408
 409    /* _really_ clear tiles now if needed */
 410    if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
 411       clear_c_tile(&spu.ctile);
 412       spu.cur_ctile_status = TILE_STATUS_DIRTY;
 413    }
 414    if (spu.depth_stencil.depth.enabled &&
 415        spu.cur_ztile_status == TILE_STATUS_CLEAR) {
 416       clear_z_tile(&spu.ztile);
 417       spu.cur_ztile_status = TILE_STATUS_DIRTY;
 418    }
 419
 420    /* XXX this loop could be moved into the above switch cases and
 421     * calculate_mask() could be simplified a bit...
 422     */
 423    for (x = block(minleft); x <= block(maxright); x += 2) {
 424       emit_quad( x, setup.span.y, calculate_mask( x ) );
 425    }
 426
 427    setup.span.y = 0;
 428    setup.span.y_flags = 0;
 429    setup.span.right[0] = 0;
 430    setup.span.right[1] = 0;
 431 }
 432
 433 #if DEBUG_VERTS
 434 static void print_vertex(const struct vertex_header *v)
 435 {
 436    int i;
 437    fprintf(stderr, "Vertex: (%p)\n", v);
 438    for (i = 0; i < setup.quad.nr_attrs; i++) {
 439       fprintf(stderr, "  %d: %f %f %f %f\n",  i,
 440               v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
 441    }
 442 }
 443 #endif
 444
 445 static boolean setup_sort_vertices(const struct vertex_header *v0,
 446                                    const struct vertex_header *v1,
 447                                    const struct vertex_header *v2)
 448 {
 449
 450 #if DEBUG_VERTS
 451    fprintf(stderr, "Triangle:\n");
 452    print_vertex(v0);
 453    print_vertex(v1);
 454    print_vertex(v2);
 455 #endif
 456
 457    setup.vprovoke = v2;
 458
 459    /* determine bottom to top order of vertices */
 460    {
 461       float y0 = v0->data[0][1];
 462       float y1 = v1->data[0][1];
 463       float y2 = v2->data[0][1];
 464       if (y0 <= y1) {
 465          if (y1 <= y2) {
 466             /* y0<=y1<=y2 */
 467             setup.vmin = v0;
 468             setup.vmid = v1;
 469             setup.vmax = v2;
 470          }
 471          else if (y2 <= y0) {
 472             /* y2<=y0<=y1 */
 473             setup.vmin = v2;
 474             setup.vmid = v0;
 475             setup.vmax = v1;
 476          }
 477          else {
 478             /* y0<=y2<=y1 */
 479             setup.vmin = v0;
 480             setup.vmid = v2;
 481             setup.vmax = v1;
 482          }
 483       }
 484       else {
 485          if (y0 <= y2) {
 486             /* y1<=y0<=y2 */
 487             setup.vmin = v1;
 488             setup.vmid = v0;
 489             setup.vmax = v2;
 490          }
 491          else if (y2 <= y1) {
 492             /* y2<=y1<=y0 */
 493             setup.vmin = v2;
 494             setup.vmid = v1;
 495             setup.vmax = v0;
 496          }
 497          else {
 498             /* y1<=y2<=y0 */
 499             setup.vmin = v1;
 500             setup.vmid = v2;
 501             setup.vmax = v0;
 502          }
 503       }
 504    }
 505
 506    /* Check if triangle is completely outside the tile bounds */
 507    if (setup.vmin->data[0][1] > setup.cliprect_maxy)
 508       return FALSE;
 509    if (setup.vmax->data[0][1] < setup.cliprect_miny)
 510       return FALSE;
 511    if (setup.vmin->data[0][0] < setup.cliprect_minx &&
 512        setup.vmid->data[0][0] < setup.cliprect_minx &&
 513        setup.vmax->data[0][0] < setup.cliprect_minx)
 514       return FALSE;
 515    if (setup.vmin->data[0][0] > setup.cliprect_maxx &&
 516        setup.vmid->data[0][0] > setup.cliprect_maxx &&
 517        setup.vmax->data[0][0] > setup.cliprect_maxx)
 518       return FALSE;
 519
 520    setup.ebot.dx = setup.vmid->data[0][0] - setup.vmin->data[0][0];
 521    setup.ebot.dy = setup.vmid->data[0][1] - setup.vmin->data[0][1];
 522    setup.emaj.dx = setup.vmax->data[0][0] - setup.vmin->data[0][0];
 523    setup.emaj.dy = setup.vmax->data[0][1] - setup.vmin->data[0][1];
 524    setup.etop.dx = setup.vmax->data[0][0] - setup.vmid->data[0][0];
 525    setup.etop.dy = setup.vmax->data[0][1] - setup.vmid->data[0][1];
 526
 527    /*
 528     * Compute triangle's area.  Use 1/area to compute partial
 529     * derivatives of attributes later.
 530     *
 531     * The area will be the same as prim->det, but the sign may be
 532     * different depending on how the vertices get sorted above.
 533     *
 534     * To determine whether the primitive is front or back facing we
 535     * use the prim->det value because its sign is correct.
 536     */
 537    {
 538       const float area = (setup.emaj.dx * setup.ebot.dy -
 539                             setup.ebot.dx * setup.emaj.dy);
 540
 541       setup.oneoverarea = 1.0f / area;
 542       /*
 543       _mesa_printf("%s one-over-area %f  area %f  det %f\n",
 544                    __FUNCTION__, setup.oneoverarea, area, prim->det );
 545       */
 546    }
 547
 548 #if 0
 549    /* We need to know if this is a front or back-facing triangle for:
 550     *  - the GLSL gl_FrontFacing fragment attribute (bool)
 551     *  - two-sided stencil test
 552     */
 553    setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
 554 #endif
 555
 556    return TRUE;
 557 }
 558
 559
 560 /**
 561  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
 562  * The value value comes from vertex->data[slot].
 563  * The result will be put into setup.coef[slot].a0.
 564  * \param slot  which attribute slot
 565  */
 566 static INLINE void const_coeff(uint slot)
 567 {
 568    setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
 569    setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
 570    setup.coef[slot].a0.f[0] = setup.vprovoke->data[slot][0];
 571    setup.coef[slot].a0.f[1] = setup.vprovoke->data[slot][1];
 572    setup.coef[slot].a0.f[2] = setup.vprovoke->data[slot][2];
 573    setup.coef[slot].a0.f[3] = setup.vprovoke->data[slot][3];
 574 }
 575
 576
 577 /**
 578  * Compute a0, dadx and dady for a linearly interpolated coefficient,
 579  * for a triangle.
 580  */
 581 static void tri_linear_coeff( uint slot, uint firstComp, uint lastComp )
 582 {
 583    uint i;
 584    for (i = firstComp; i < lastComp; i++) {
 585       float botda = setup.vmid->data[slot][i] - setup.vmin->data[slot][i];
 586       float majda = setup.vmax->data[slot][i] - setup.vmin->data[slot][i];
 587       float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
 588       float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
 589
 590       ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
 591
 592       setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
 593       setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
 594
 595       /* calculate a0 as the value which would be sampled for the
 596        * fragment at (0,0), taking into account that we want to sample at
 597        * pixel centers, in other words (0.5, 0.5).
 598        *
 599        * this is neat but unfortunately not a good way to do things for
 600        * triangles with very large values of dadx or dady as it will
 601        * result in the subtraction and re-addition from a0 of a very
 602        * large number, which means we'll end up loosing a lot of the
 603        * fractional bits and precision from a0.  the way to fix this is
 604        * to define a0 as the sample at a pixel center somewhere near vmin
 605        * instead - i'll switch to this later.
 606        */
 607       setup.coef[slot].a0.f[i] = (setup.vmin->data[slot][i] -
 608                                  (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
 609                                   setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
 610    }
 611
 612    /*
 613    _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
 614                 slot, "xyzw"[i],
 615                 setup.coef[slot].a0[i],
 616                 setup.coef[slot].dadx.f[i],
 617                 setup.coef[slot].dady.f[i]);
 618    */
 619 }
 620
 621
 622 #if 0
 623 /**
 624  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 625  * for a triangle.
 626  * We basically multiply the vertex value by 1/w before computing
 627  * the plane coefficients (a0, dadx, dady).
 628  * Later, when we compute the value at a particular fragment position we'll
 629  * divide the interpolated value by the interpolated W at that fragment.
 630  */
 631 static void tri_persp_coeff( unsigned slot,
 632                              unsigned i )
 633 {
 634    /* premultiply by 1/w:
 635     */
 636    float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
 637    float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
 638    float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
 639
 640    float botda = mida - mina;
 641    float majda = maxa - mina;
 642    float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
 643    float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
 644
 645    /*
 646    printf("tri persp %d,%d: %f %f %f\n", slot, i,
 647           setup.vmin->data[slot][i],
 648           setup.vmid->data[slot][i],
 649           setup.vmax->data[slot][i]
 650           );
 651    */
 652
 653    assert(slot < PIPE_MAX_SHADER_INPUTS);
 654    assert(i <= 3);
 655
 656    setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
 657    setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
 658    setup.coef[slot].a0.f[i] = (mina -
 659                             (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
 660                              setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
 661 }
 662 #endif
 663
 664
 665 /**
 666  * Compute the setup.coef[] array dadx, dady, a0 values.
 667  * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
 668  */
 669 static void setup_tri_coefficients(void)
 670 {
 671 #if 1
 672    uint i;
 673
 674    for (i = 0; i < spu.vertex_info.num_attribs; i++) {
 675       switch (spu.vertex_info.interp_mode[i]) {
 676       case INTERP_NONE:
 677          break;
 678       case INTERP_POS:
 679          tri_linear_coeff(i, 2, 3);
 680          /* XXX interp W if PERSPECTIVE... */
 681          break;
 682       case INTERP_CONSTANT:
 683          const_coeff(i);
 684          break;
 685       case INTERP_LINEAR:
 686          tri_linear_coeff(i, 0, 4);
 687          break;
 688       case INTERP_PERSPECTIVE:
 689          tri_linear_coeff(i, 0, 4); /* XXX temporary */
 690          break;
 691       default:
 692          ASSERT(0);
 693       }
 694    }
 695 #else
 696    ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
 697    ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
 698           spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
 699    tri_linear_coeff(0, 2, 3);  /* slot 0, z */
 700    tri_linear_coeff(1, 0, 4);  /* slot 1, color */
 701 #endif
 702 }
 703
 704
 705 static void setup_tri_edges(void)
 706 {
 707    float vmin_x = setup.vmin->data[0][0] + 0.5f;
 708    float vmid_x = setup.vmid->data[0][0] + 0.5f;
 709
 710    float vmin_y = setup.vmin->data[0][1] - 0.5f;
 711    float vmid_y = setup.vmid->data[0][1] - 0.5f;
 712    float vmax_y = setup.vmax->data[0][1] - 0.5f;
 713
 714    setup.emaj.sy = CEILF(vmin_y);
 715    setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
 716    setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
 717    setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
 718
 719    setup.etop.sy = CEILF(vmid_y);
 720    setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
 721    setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
 722    setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
 723
 724    setup.ebot.sy = CEILF(vmin_y);
 725    setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
 726    setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
 727    setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
 728 }
 729
 730
 731 /**
 732  * Render the upper or lower half of a triangle.
 733  * Scissoring/cliprect is applied here too.
 734  */
 735 static void subtriangle( struct edge *eleft,
 736                          struct edge *eright,
 737                          unsigned lines )
 738 {
 739    const int minx = setup.cliprect_minx;
 740    const int maxx = setup.cliprect_maxx;
 741    const int miny = setup.cliprect_miny;
 742    const int maxy = setup.cliprect_maxy;
 743    int y, start_y, finish_y;
 744    int sy = (int)eleft->sy;
 745
 746    ASSERT((int)eleft->sy == (int) eright->sy);
 747
 748    /* clip top/bottom */
 749    start_y = sy;
 750    finish_y = sy + lines;
 751
 752    if (start_y < miny)
 753       start_y = miny;
 754
 755    if (finish_y > maxy)
 756       finish_y = maxy;
 757
 758    start_y -= sy;
 759    finish_y -= sy;
 760
 761    /*
 762    _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
 763    */
 764
 765    for (y = start_y; y < finish_y; y++) {
 766
 767       /* avoid accumulating adds as floats don't have the precision to
 768        * accurately iterate large triangle edges that way.  luckily we
 769        * can just multiply these days.
 770        *
 771        * this is all drowned out by the attribute interpolation anyway.
 772        */
 773       int left = (int)(eleft->sx + y * eleft->dxdy);
 774       int right = (int)(eright->sx + y * eright->dxdy);
 775
 776       /* clip left/right */
 777       if (left < minx)
 778          left = minx;
 779       if (right > maxx)
 780          right = maxx;
 781
 782       if (left < right) {
 783          int _y = sy + y;
 784          if (block(_y) != setup.span.y) {
 785             flush_spans();
 786             setup.span.y = block(_y);
 787          }
 788
 789          setup.span.left[_y&1] = left;
 790          setup.span.right[_y&1] = right;
 791          setup.span.y_flags |= 1<<(_y&1);
 792       }
 793    }
 794
 795
 796    /* save the values so that emaj can be restarted:
 797     */
 798    eleft->sx += lines * eleft->dxdy;
 799    eright->sx += lines * eright->dxdy;
 800    eleft->sy += lines;
 801    eright->sy += lines;
 802 }
 803
 804
 805 /**
 806  * Draw triangle into tile at (tx, ty) (tile coords)
 807  * The tile data should have already been fetched.
 808  */
 809 boolean
 810 tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
 811 {
 812    setup.tx = tx;
 813    setup.ty = ty;
 814
 815    /* set clipping bounds to tile bounds */
 816    setup.cliprect_minx = tx * TILE_SIZE;
 817    setup.cliprect_miny = ty * TILE_SIZE;
 818    setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
 819    setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
 820
 821    if (!setup_sort_vertices((struct vertex_header *) v0,
 822                             (struct vertex_header *) v1,
 823                             (struct vertex_header *) v2)) {
 824       return FALSE; /* totally clipped */
 825    }
 826
 827    setup_tri_coefficients();
 828    setup_tri_edges();
 829
 830    setup.span.y = 0;
 831    setup.span.y_flags = 0;
 832    setup.span.right[0] = 0;
 833    setup.span.right[1] = 0;
 834    /*   setup.span.z_mode = tri_z_mode( setup.ctx ); */
 835
 836    /*   init_constant_attribs( setup ); */
 837
 838    if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
 839       /* wait for mfc_get() to complete */
 840       wait_on_mask(1 << TAG_READ_TILE_COLOR);
 841       spu.cur_ctile_status = TILE_STATUS_CLEAN;
 842    }
 843    ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
 844
 845    if (spu.depth_stencil.depth.enabled) {
 846       if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
 847          /* wait for mfc_get() to complete */
 848          wait_on_mask(1 << TAG_READ_TILE_Z);
 849          spu.cur_ztile_status = TILE_STATUS_CLEAN;
 850       }
 851       ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
 852    }
 853
 854
 855    if (setup.oneoverarea < 0.0) {
 856       /* emaj on left:
 857        */
 858       subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
 859       subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
 860    }
 861    else {
 862       /* emaj on right:
 863        */
 864       subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
 865       subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
 866    }
 867
 868    flush_spans();
 869
 870    return TRUE;
 871 }