src/gallium/drivers/llvmpipe/lp_rast_tri.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007-2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Rasterization for binned triangles within a tile
  30  */
  31
  32 #include <limits.h>
  33 #include "util/u_math.h"
  34 #include "lp_debug.h"
  35 #include "lp_perf.h"
  36 #include "lp_rast_priv.h"
  37
  38 /**
  39  * Shade all pixels in a 4x4 block.
  40  */
  41 static void
  42 block_full_4(struct lp_rasterizer_task *task,
  43              const struct lp_rast_triangle *tri,
  44              int x, int y)
  45 {
  46    lp_rast_shade_quads_all(task, &tri->inputs, x, y);
  47 }
  48
  49
  50 /**
  51  * Shade all pixels in a 16x16 block.
  52  */
  53 static void
  54 block_full_16(struct lp_rasterizer_task *task,
  55               const struct lp_rast_triangle *tri,
  56               int x, int y)
  57 {
  58    unsigned ix, iy;
  59    assert(x % 16 == 0);
  60    assert(y % 16 == 0);
  61    for (iy = 0; iy < 16; iy += 4)
  62       for (ix = 0; ix < 16; ix += 4)
  63          block_full_4(task, tri, x + ix, y + iy);
  64 }
  65
  66 static INLINE unsigned
  67 build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
  68 {
  69    unsigned mask = 0;
  70
  71    int64_t c0 = c;
  72    int64_t c1 = c0 + dcdy;
  73    int64_t c2 = c1 + dcdy;
  74    int64_t c3 = c2 + dcdy;
  75
  76    mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
  77    mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
  78    mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
  79    mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
  80    mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
  81    mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
  82    mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
  83    mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
  84    mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
  85    mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
  86    mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
  87    mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
  88    mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
  89    mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
  90    mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14);
  91    mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15);
  92
  93    return mask;
  94 }
  95
  96
  97 static INLINE void
  98 build_masks(int64_t c,
  99             int64_t cdiff,
 100             int64_t dcdx,
 101             int64_t dcdy,
 102             unsigned *outmask,
 103             unsigned *partmask)
 104 {
 105    *outmask |= build_mask_linear(c, dcdx, dcdy);
 106    *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
 107 }
 108
 109 void
 110 lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
 111                       const union lp_rast_cmd_arg arg)
 112 {
 113    union lp_rast_cmd_arg arg2;
 114    arg2.triangle.tri = arg.triangle.tri;
 115    arg2.triangle.plane_mask = (1<<3)-1;
 116    lp_rast_triangle_3(task, arg2);
 117 }
 118
 119 void
 120 lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
 121                       const union lp_rast_cmd_arg arg)
 122 {
 123    lp_rast_triangle_3_16(task, arg);
 124 }
 125
 126 void
 127 lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
 128                       const union lp_rast_cmd_arg arg)
 129 {
 130    union lp_rast_cmd_arg arg2;
 131    arg2.triangle.tri = arg.triangle.tri;
 132    arg2.triangle.plane_mask = (1<<4)-1;
 133    lp_rast_triangle_4(task, arg2);
 134 }
 135
 136 #if !defined(PIPE_ARCH_SSE)
 137
 138 void
 139 lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
 140                          const union lp_rast_cmd_arg arg)
 141 {
 142    union lp_rast_cmd_arg arg2;
 143    arg2.triangle.tri = arg.triangle.tri;
 144    arg2.triangle.plane_mask = (1<<3)-1;
 145    lp_rast_triangle_32_3(task, arg2);
 146 }
 147
 148 void
 149 lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
 150                          const union lp_rast_cmd_arg arg)
 151 {
 152    union lp_rast_cmd_arg arg2;
 153    arg2.triangle.tri = arg.triangle.tri;
 154    arg2.triangle.plane_mask = (1<<4)-1;
 155    lp_rast_triangle_32_4(task, arg2);
 156 }
 157
 158 void
 159 lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
 160                       const union lp_rast_cmd_arg arg)
 161 {
 162    lp_rast_triangle_32_3_16(task, arg);
 163 }
 164
 165 #else
 166 #include <emmintrin.h>
 167 #include "util/u_sse.h"
 168
 169
 170 static INLINE void
 171 build_masks_32(int c,
 172                int cdiff,
 173                int dcdx,
 174                int dcdy,
 175                unsigned *outmask,
 176                unsigned *partmask)
 177 {
 178    __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
 179    __m128i xdcdy = _mm_set1_epi32(dcdy);
 180
 181    /* Get values across the quad
 182     */
 183    __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
 184    __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
 185    __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
 186
 187    {
 188       __m128i cstep01, cstep23, result;
 189
 190       cstep01 = _mm_packs_epi32(cstep0, cstep1);
 191       cstep23 = _mm_packs_epi32(cstep2, cstep3);
 192       result = _mm_packs_epi16(cstep01, cstep23);
 193
 194       *outmask |= _mm_movemask_epi8(result);
 195    }
 196
 197
 198    {
 199       __m128i cio4 = _mm_set1_epi32(cdiff);
 200       __m128i cstep01, cstep23, result;
 201
 202       cstep0 = _mm_add_epi32(cstep0, cio4);
 203       cstep1 = _mm_add_epi32(cstep1, cio4);
 204       cstep2 = _mm_add_epi32(cstep2, cio4);
 205       cstep3 = _mm_add_epi32(cstep3, cio4);
 206
 207       cstep01 = _mm_packs_epi32(cstep0, cstep1);
 208       cstep23 = _mm_packs_epi32(cstep2, cstep3);
 209       result = _mm_packs_epi16(cstep01, cstep23);
 210
 211       *partmask |= _mm_movemask_epi8(result);
 212    }
 213 }
 214
 215
 216 static INLINE unsigned
 217 build_mask_linear_32(int c, int dcdx, int dcdy)
 218 {
 219    __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
 220    __m128i xdcdy = _mm_set1_epi32(dcdy);
 221
 222    /* Get values across the quad
 223     */
 224    __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
 225    __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
 226    __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
 227
 228    /* pack pairs of results into epi16
 229     */
 230    __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
 231    __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
 232
 233    /* pack into epi8, preserving sign bits
 234     */
 235    __m128i result = _mm_packs_epi16(cstep01, cstep23);
 236
 237    /* extract sign bits to create mask
 238     */
 239    return _mm_movemask_epi8(result);
 240 }
 241
 242 static INLINE unsigned
 243 sign_bits4(const __m128i *cstep, int cdiff)
 244 {
 245
 246    /* Adjust the step values
 247     */
 248    __m128i cio4 = _mm_set1_epi32(cdiff);
 249    __m128i cstep0 = _mm_add_epi32(cstep[0], cio4);
 250    __m128i cstep1 = _mm_add_epi32(cstep[1], cio4);
 251    __m128i cstep2 = _mm_add_epi32(cstep[2], cio4);
 252    __m128i cstep3 = _mm_add_epi32(cstep[3], cio4);
 253
 254    /* Pack down to epi8
 255     */
 256    __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
 257    __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
 258    __m128i result = _mm_packs_epi16(cstep01, cstep23);
 259
 260    /* Extract the sign bits
 261     */
 262    return _mm_movemask_epi8(result);
 263 }
 264
 265
 266 #define NR_PLANES 3
 267
 268
 269
 270
 271
 272
 273
 274 void
 275 lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
 276                       const union lp_rast_cmd_arg arg)
 277 {
 278    const struct lp_rast_triangle *tri = arg.triangle.tri;
 279    const struct lp_rast_plane *plane = GET_PLANES(tri);
 280    int x = (arg.triangle.plane_mask & 0xff) + task->x;
 281    int y = (arg.triangle.plane_mask >> 8) + task->y;
 282    unsigned i, j;
 283
 284    struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
 285    unsigned nr = 0;
 286
 287    __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
 288    __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
 289    __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
 290    __m128i zero = _mm_setzero_si128();
 291
 292    __m128i c;
 293    __m128i dcdx;
 294    __m128i dcdy;
 295    __m128i rej4;
 296
 297    __m128i dcdx2;
 298    __m128i dcdx3;
 299
 300    __m128i span_0;                /* 0,dcdx,2dcdx,3dcdx for plane 0 */
 301    __m128i span_1;                /* 0,dcdx,2dcdx,3dcdx for plane 1 */
 302    __m128i span_2;                /* 0,dcdx,2dcdx,3dcdx for plane 2 */
 303    __m128i unused;
 304
 305    transpose4_epi32(&p0, &p1, &p2, &zero,
 306                     &c, &dcdx, &dcdy, &rej4);
 307
 308    /* Adjust dcdx;
 309     */
 310    dcdx = _mm_sub_epi32(zero, dcdx);
 311
 312    c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
 313    c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
 314    rej4 = _mm_slli_epi32(rej4, 2);
 315
 316    /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
 317    c = _mm_sub_epi32(c, _mm_set1_epi32(1));
 318    rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1));
 319
 320    dcdx2 = _mm_add_epi32(dcdx, dcdx);
 321    dcdx3 = _mm_add_epi32(dcdx2, dcdx);
 322
 323    transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
 324                     &span_0, &span_1, &span_2, &unused);
 325
 326    for (i = 0; i < 4; i++) {
 327       __m128i cx = c;
 328
 329       for (j = 0; j < 4; j++) {
 330          __m128i c4rej = _mm_add_epi32(cx, rej4);
 331          __m128i rej_masks = _mm_srai_epi32(c4rej, 31);
 332
 333          /* if (is_zero(rej_masks)) */
 334          if (_mm_movemask_epi8(rej_masks) == 0) {
 335             __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0);
 336             __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1);
 337             __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2);
 338
 339             __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
 340
 341             __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
 342             __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
 343             __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
 344
 345             __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
 346             __m128i c_01 = _mm_packs_epi32(c_0, c_1);
 347
 348             __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
 349             __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
 350             __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
 351
 352             __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
 353
 354             __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
 355             __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
 356             __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
 357
 358             __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
 359             __m128i c_23 = _mm_packs_epi32(c_2, c_3);
 360             __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
 361
 362             unsigned mask = _mm_movemask_epi8(c_0123);
 363
 364             out[nr].i = i;
 365             out[nr].j = j;
 366             out[nr].mask = mask;
 367             if (mask != 0xffff)
 368                nr++;
 369          }
 370          cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2));
 371       }
 372
 373       c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2));
 374    }
 375
 376    for (i = 0; i < nr; i++)
 377       lp_rast_shade_quads_mask(task,
 378                                &tri->inputs,
 379                                x + 4 * out[i].j,
 380                                y + 4 * out[i].i,
 381                                0xffff & ~out[i].mask);
 382 }
 383
 384
 385
 386
 387
 388 void
 389 lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
 390                      const union lp_rast_cmd_arg arg)
 391 {
 392    const struct lp_rast_triangle *tri = arg.triangle.tri;
 393    const struct lp_rast_plane *plane = GET_PLANES(tri);
 394    unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
 395    unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
 396
 397    __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
 398    __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
 399    __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
 400    __m128i zero = _mm_setzero_si128();
 401
 402    __m128i c;
 403    __m128i dcdx;
 404    __m128i dcdy;
 405
 406    __m128i dcdx2;
 407    __m128i dcdx3;
 408
 409    __m128i span_0;                /* 0,dcdx,2dcdx,3dcdx for plane 0 */
 410    __m128i span_1;                /* 0,dcdx,2dcdx,3dcdx for plane 1 */
 411    __m128i span_2;                /* 0,dcdx,2dcdx,3dcdx for plane 2 */
 412    __m128i unused;
 413
 414    transpose4_epi32(&p0, &p1, &p2, &zero,
 415                     &c, &dcdx, &dcdy, &unused);
 416
 417    /* Adjust dcdx;
 418     */
 419    dcdx = _mm_sub_epi32(zero, dcdx);
 420
 421    c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
 422    c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
 423
 424    /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
 425    c = _mm_sub_epi32(c, _mm_set1_epi32(1));
 426
 427    dcdx2 = _mm_add_epi32(dcdx, dcdx);
 428    dcdx3 = _mm_add_epi32(dcdx2, dcdx);
 429
 430    transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
 431                     &span_0, &span_1, &span_2, &unused);
 432
 433
 434    {
 435       __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0);
 436       __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1);
 437       __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2);
 438
 439       __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
 440
 441       __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
 442       __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
 443       __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
 444
 445       __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
 446       __m128i c_01 = _mm_packs_epi32(c_0, c_1);
 447
 448       __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
 449       __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
 450       __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
 451
 452       __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
 453
 454       __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
 455       __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
 456       __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
 457
 458       __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
 459       __m128i c_23 = _mm_packs_epi32(c_2, c_3);
 460       __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
 461
 462       unsigned mask = _mm_movemask_epi8(c_0123);
 463
 464       if (mask != 0xffff)
 465          lp_rast_shade_quads_mask(task,
 466                                   &tri->inputs,
 467                                   x,
 468                                   y,
 469                                   0xffff & ~mask);
 470    }
 471 }
 472
 473 #undef NR_PLANES
 474 #endif
 475
 476
 477 #define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks(c, cdiff, dcdx, dcdy, omask, pmask)
 478 #define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear(c, dcdx, dcdy)
 479
 480 #define TAG(x) x##_1
 481 #define NR_PLANES 1
 482 #include "lp_rast_tri_tmp.h"
 483
 484 #define TAG(x) x##_2
 485 #define NR_PLANES 2
 486 #include "lp_rast_tri_tmp.h"
 487
 488 #define TAG(x) x##_3
 489 #define NR_PLANES 3
 490 /*#define TRI_4 lp_rast_triangle_3_4*/
 491 /*#define TRI_16 lp_rast_triangle_3_16*/
 492 #include "lp_rast_tri_tmp.h"
 493
 494 #define TAG(x) x##_4
 495 #define NR_PLANES 4
 496 /*#define TRI_16 lp_rast_triangle_4_16*/
 497 #include "lp_rast_tri_tmp.h"
 498
 499 #define TAG(x) x##_5
 500 #define NR_PLANES 5
 501 #include "lp_rast_tri_tmp.h"
 502
 503 #define TAG(x) x##_6
 504 #define NR_PLANES 6
 505 #include "lp_rast_tri_tmp.h"
 506
 507 #define TAG(x) x##_7
 508 #define NR_PLANES 7
 509 #include "lp_rast_tri_tmp.h"
 510
 511 #define TAG(x) x##_8
 512 #define NR_PLANES 8
 513 #include "lp_rast_tri_tmp.h"
 514
 515 #ifdef PIPE_ARCH_SSE
 516 #undef BUILD_MASKS
 517 #undef BUILD_MASK_LINEAR
 518 #define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask)
 519 #define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear_32((int)c, dcdx, dcdy)
 520 #endif
 521
 522 #define TAG(x) x##_32_1
 523 #define NR_PLANES 1
 524 #include "lp_rast_tri_tmp.h"
 525
 526 #define TAG(x) x##_32_2
 527 #define NR_PLANES 2
 528 #include "lp_rast_tri_tmp.h"
 529
 530 #define TAG(x) x##_32_3
 531 #define NR_PLANES 3
 532 /*#define TRI_4 lp_rast_triangle_3_4*/
 533 /*#define TRI_16 lp_rast_triangle_3_16*/
 534 #include "lp_rast_tri_tmp.h"
 535
 536 #define TAG(x) x##_32_4
 537 #define NR_PLANES 4
 538 #ifdef PIPE_ARCH_SSE
 539 #define TRI_16 lp_rast_triangle_32_4_16
 540 #endif
 541 #include "lp_rast_tri_tmp.h"
 542
 543 #define TAG(x) x##_32_5
 544 #define NR_PLANES 5
 545 #include "lp_rast_tri_tmp.h"
 546
 547 #define TAG(x) x##_32_6
 548 #define NR_PLANES 6
 549 #include "lp_rast_tri_tmp.h"
 550
 551 #define TAG(x) x##_32_7
 552 #define NR_PLANES 7
 553 #include "lp_rast_tri_tmp.h"
 554
 555 #define TAG(x) x##_32_8
 556 #define NR_PLANES 8
 557 #include "lp_rast_tri_tmp.h"
 558