src/gallium/drivers/llvmpipe/lp_rast_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007-2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Rasterization for binned triangles within a tile
  30  */
  31
  32 #include "lp_context.h"
  33 #include "lp_quad.h"
  34 #include "lp_quad_pipe.h"
  35 #include "lp_setup.h"
  36 #include "lp_state.h"
  37 #include "draw/draw_context.h"
  38 #include "draw/draw_private.h"
  39 #include "draw/draw_vertex.h"
  40 #include "pipe/p_shader_tokens.h"
  41 #include "pipe/p_thread.h"
  42 #include "util/u_math.h"
  43 #include "util/u_memory.h"
  44
  45 #define BLOCKSIZE 4
  46
  47
  48 /* Convert 8x8 block into four runs of quads and render each in turn.
  49  */
  50 #if (BLOCKSIZE == 8)
  51 static void block_full( struct triangle *tri, int x, int y )
  52 {
  53    struct quad_header *ptrs[4];
  54    int i;
  55
  56    tri->quad[0].input.x0 = x + 0;
  57    tri->quad[1].input.x0 = x + 2;
  58    tri->quad[2].input.x0 = x + 4;
  59    tri->quad[3].input.x0 = x + 6;
  60
  61    for (i = 0; i < 4; i++, y += 2) {
  62       tri->quad[0].inout.mask = 0xf;
  63       tri->quad[1].inout.mask = 0xf;
  64       tri->quad[2].inout.mask = 0xf;
  65       tri->quad[3].inout.mask = 0xf;
  66
  67       tri->quad[0].input.y0 = y;
  68       tri->quad[1].input.y0 = y;
  69       tri->quad[2].input.y0 = y;
  70       tri->quad[3].input.y0 = y;
  71
  72       /* XXX: don't bother with this ptrs business */
  73       ptrs[0] = &tri->quad[0];
  74       ptrs[1] = &tri->quad[1];
  75       ptrs[2] = &tri->quad[2];
  76       ptrs[3] = &tri->quad[3];
  77
  78       tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 );
  79    }
  80 }
  81 #else
  82 static void block_full( struct triangle *tri, int x, int y )
  83 {
  84    struct quad_header *ptrs[4];
  85    int iy;
  86
  87    tri->quad[0].input.x0 = x + 0;
  88    tri->quad[1].input.x0 = x + 2;
  89
  90    for (iy = 0; iy < 4; iy += 2) {
  91       tri->quad[0].inout.mask = 0xf;
  92       tri->quad[1].inout.mask = 0xf;
  93
  94       tri->quad[0].input.y0 = y + iy;
  95       tri->quad[1].input.y0 = y + iy;
  96
  97       /* XXX: don't bother with this ptrs business */
  98       ptrs[0] = &tri->quad[0];
  99       ptrs[1] = &tri->quad[1];
 100
 101       tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 );
 102    }
 103 }
 104 #endif
 105
 106 static void
 107 do_quad( struct lp_rasterizer *rast,
 108          int x, int y,
 109          float c1, float c2, float c3 )
 110 {
 111    struct triangle *tri = rast->tri;
 112    struct quad_header *quad = &rast->quad[0];
 113
 114    float xstep1 = -tri->dy12;
 115    float xstep2 = -tri->dy23;
 116    float xstep3 = -tri->dy31;
 117
 118    float ystep1 = tri->dx12;
 119    float ystep2 = tri->dx23;
 120    float ystep3 = tri->dx31;
 121
 122    quad->input.x0 = x;
 123    quad->input.y0 = y;
 124    quad->inout.mask = 0;
 125
 126    if (c1 > 0 &&
 127        c2 > 0 &&
 128        c3 > 0)
 129       quad->inout.mask |= 1;
 130
 131    if (c1 + xstep1 > 0 &&
 132        c2 + xstep2 > 0 &&
 133        c3 + xstep3 > 0)
 134       quad->inout.mask |= 2;
 135
 136    if (c1 + ystep1 > 0 &&
 137        c2 + ystep2 > 0 &&
 138        c3 + ystep3 > 0)
 139       quad->inout.mask |= 4;
 140
 141    if (c1 + ystep1 + xstep1 > 0 &&
 142        c2 + ystep2 + xstep2 > 0 &&
 143        c3 + ystep3 + xstep3 > 0)
 144       quad->inout.mask |= 8;
 145
 146    if (quad->inout.mask)
 147       rast->state->run( rast->state->state, &quad, 1 );
 148 }
 149
 150 /* Evaluate each pixel in a block, generate a mask and possibly render
 151  * the quad:
 152  */
 153 static void
 154 do_block( struct triangle *tri,
 155          int x, int y,
 156          float c1,
 157          float c2,
 158          float c3 )
 159 {
 160    const int step = 2;
 161
 162    float xstep1 = -step * tri->dy12;
 163    float xstep2 = -step * tri->dy23;
 164    float xstep3 = -step * tri->dy31;
 165
 166    float ystep1 = step * tri->dx12;
 167    float ystep2 = step * tri->dx23;
 168    float ystep3 = step * tri->dx31;
 169
 170    int ix, iy;
 171
 172    for (iy = 0; iy < BLOCKSIZE; iy += 2) {
 173       float cx1 = c1;
 174       float cx2 = c2;
 175       float cx3 = c3;
 176
 177       for (ix = 0; ix < BLOCKSIZE; ix += 2) {
 178
 179          do_quad(tri, x+ix, y+iy, cx1, cx2, cx3);
 180
 181          cx1 += xstep1;
 182          cx2 += xstep2;
 183          cx3 += xstep3;
 184       }
 185
 186       c1 += ystep1;
 187       c2 += ystep2;
 188       c3 += ystep3;
 189    }
 190 }
 191
 192
 193
 194 /* Scan the tile in chunks and figure out which pixels to rasterize
 195  * for this triangle:
 196  */
 197 void lp_rast_triangle( struct lp_rasterizer *rast,
 198                        const struct lp_rast_triangle *tri )
 199 {
 200    int minx, maxx, miny, maxy;
 201
 202    /* Clamp to tile dimensions:
 203     */
 204    minx = MAX2(tri->maxx, rast->x);
 205    miny = MAX2(tri->miny, rast->y);
 206    maxx = MIN2(tri->maxx, rast->x + TILE_SIZE);
 207    maxy = MIN2(tri->maxy, rast->y + TILE_SIZE);
 208
 209    if (miny == maxy ||
 210        minx == maxx) {
 211       debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__);
 212       //assert(0);
 213       return;
 214    }
 215
 216    /* Bind parameter interpolants:
 217     */
 218    for (i = 0; i < Elements(rast->quad); i++) {
 219       rast->quad[i].coef = tri->coef;
 220       rast->quad[i].posCoef = &tri->position_coef;
 221    }
 222
 223    /* Small area?
 224     */
 225    if (miny + 16 > maxy &&
 226        minx + 16 > maxx)
 227    {
 228       const int step = 2;
 229
 230       float xstep1 = -step * tri->dy12;
 231       float xstep2 = -step * tri->dy23;
 232       float xstep3 = -step * tri->dy31;
 233
 234       float ystep1 = step * tri->dx12;
 235       float ystep2 = step * tri->dx23;
 236       float ystep3 = step * tri->dx31;
 237
 238       float eo1 = tri->eo1 * step;
 239       float eo2 = tri->eo2 * step;
 240       float eo3 = tri->eo3 * step;
 241
 242       int x, y;
 243
 244       minx &= ~(step-1);
 245       maxx &= ~(step-1);
 246
 247       /* Subdivide space into NxM blocks, where each block is square and
 248        * power-of-four in dimension.
 249        *
 250        * Trivially accept or reject blocks, else jump to per-pixel
 251        * examination above.
 252        */
 253       for (y = miny; y < maxy; y += step)
 254       {
 255          float cx1 = c1;
 256          float cx2 = c2;
 257          float cx3 = c3;
 258
 259          for (x = minx; x < maxx; x += step)
 260          {
 261             if (cx1 + eo1 < 0 ||
 262                 cx2 + eo2 < 0 ||
 263                 cx3 + eo3 < 0)
 264             {
 265             }
 266             else
 267             {
 268                do_quad(&tri, x, y, cx1, cx2, cx3);
 269             }
 270
 271             /* Iterate cx values across the region:
 272              */
 273             cx1 += xstep1;
 274             cx2 += xstep2;
 275             cx3 += xstep3;
 276          }
 277
 278          /* Iterate c values down the region:
 279           */
 280          c1 += ystep1;
 281          c2 += ystep2;
 282          c3 += ystep3;
 283       }
 284    }
 285    else
 286    {
 287       const int step = BLOCKSIZE;
 288
 289       float ei1 = tri->ei1 * step;
 290       float ei2 = tri->ei2 * step;
 291       float ei3 = tri->ei3 * step;
 292
 293       float eo1 = tri->eo1 * step;
 294       float eo2 = tri->eo2 * step;
 295       float eo3 = tri->eo3 * step;
 296
 297       float xstep1 = -step * tri->dy12;
 298       float xstep2 = -step * tri->dy23;
 299       float xstep3 = -step * tri->dy31;
 300
 301       float ystep1 = step * tri->dx12;
 302       float ystep2 = step * tri->dx23;
 303       float ystep3 = step * tri->dx31;
 304       int x, y;
 305
 306       minx &= ~(step-1);
 307       miny &= ~(step-1);
 308
 309       for (y = miny; y < maxy; y += step)
 310       {
 311          float cx1 = c1;
 312          float cx2 = c2;
 313          float cx3 = c3;
 314
 315          for (x = minx; x < maxx; x += step)
 316          {
 317             if (cx1 + eo1 < 0 ||
 318                 cx2 + eo2 < 0 ||
 319                 cx3 + eo3 < 0)
 320             {
 321             }
 322             else if (cx1 + ei1 > 0 &&
 323                      cx2 + ei2 > 0 &&
 324                      cx3 + ei3 > 0)
 325             {
 326                block_full(&tri, x, y); /* trivial accept */
 327             }
 328             else
 329             {
 330                do_block(&tri, x, y, cx1, cx2, cx3);
 331             }
 332
 333             /* Iterate cx values across the region:
 334              */
 335             cx1 += xstep1;
 336             cx2 += xstep2;
 337             cx3 += xstep3;
 338          }
 339
 340          /* Iterate c values down the region:
 341           */
 342          c1 += ystep1;
 343          c2 += ystep2;
 344          c3 += ystep3;
 345       }
 346    }
 347 }
 348