llvmpipe: add rast_tri_4_16 for small lines and points
authorKeith Whitwell <keithw@vmware.com>
Fri, 8 Oct 2010 16:21:03 +0000 (17:21 +0100)
committerKeith Whitwell <keithw@vmware.com>
Fri, 8 Oct 2010 16:30:08 +0000 (17:30 +0100)
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast.h
src/gallium/drivers/llvmpipe/lp_rast_debug.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_rast_tri.c
src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
src/gallium/drivers/llvmpipe/lp_setup_tri.c

index 790d88a74508d89cc59b3f1564cae18ef9a32802..db9b2f9b128670338ccfbfbba955ff3627488b36 100644 (file)
@@ -597,6 +597,7 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
    lp_rast_triangle_8,
    lp_rast_triangle_3_4,
    lp_rast_triangle_3_16,
+   lp_rast_triangle_4_16,
    lp_rast_shade_tile,
    lp_rast_shade_tile_opaque,
    lp_rast_begin_query,
index 0f62377c0720e4eeb77b0c68274f2910168d113a..df0bea04b9ad1c16c5fd7da1dfc34775b2edd3a4 100644 (file)
@@ -238,12 +238,13 @@ lp_rast_arg_null( void )
 #define LP_RAST_OP_TRIANGLE_8        0x9
 #define LP_RAST_OP_TRIANGLE_3_4      0xa
 #define LP_RAST_OP_TRIANGLE_3_16     0xb
-#define LP_RAST_OP_SHADE_TILE        0xc
-#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xd
-#define LP_RAST_OP_BEGIN_QUERY       0xe
-#define LP_RAST_OP_END_QUERY         0xf
+#define LP_RAST_OP_TRIANGLE_4_16     0xc
+#define LP_RAST_OP_SHADE_TILE        0xd
+#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe
+#define LP_RAST_OP_BEGIN_QUERY       0xf
+#define LP_RAST_OP_END_QUERY         0x10
 
-#define LP_RAST_OP_MAX               0x10
+#define LP_RAST_OP_MAX               0x11
 #define LP_RAST_OP_MASK              0xff
 
 void
index 9fc78645a3a7007585f09dcfd428aed5006cc741..6f4ba1c6fef4a71d2f82e72e7384b2c6800c35fc 100644 (file)
@@ -42,6 +42,7 @@ static const char *cmd_names[LP_RAST_OP_MAX] =
    "triangle_8",
    "triangle_3_4",
    "triangle_3_16",
+   "triangle_4_16",
    "shade_tile",
    "shade_tile_opaque",
    "begin_query",
index 7370119e9660da554fe938046a3b93a97cdd0b5a..104000a040cc045a769e94cd94e32bca653fece1 100644 (file)
@@ -293,6 +293,10 @@ void lp_rast_triangle_3_4(struct lp_rasterizer_task *,
 
 void lp_rast_triangle_3_16( struct lp_rasterizer_task *, 
                             const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_4_16( struct lp_rasterizer_task *, 
+                            const union lp_rast_cmd_arg );
+
 void
 lp_debug_bin( const struct cmd_bin *bin );
 
index a1f309d4b01de20d5324091ecf3f44a55523fc9e..f870a187db5897ce75017997694f3edef5daa9d5 100644 (file)
@@ -122,6 +122,16 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
    lp_rast_triangle_3(task, arg2);
 }
 
+void
+lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
+                      const union lp_rast_cmd_arg arg)
+{
+   union lp_rast_cmd_arg arg2;
+   arg2.triangle.tri = arg.triangle.tri;
+   arg2.triangle.plane_mask = (1<<4)-1;
+   lp_rast_triangle_3(task, arg2);
+}
+
 void
 lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
                       const union lp_rast_cmd_arg arg)
@@ -229,145 +239,6 @@ sign_bits4(const __m128i *cstep, int cdiff)
    return _mm_movemask_epi8(result);
 }
 
-
-/* Special case for 3 plane triangle which is contained entirely
- * within a 16x16 block.
- */
-void
-lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
-                      const union lp_rast_cmd_arg arg)
-{
-   const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
-   unsigned mask = arg.triangle.plane_mask;
-   const int x = task->x + (mask & 0xff);
-   const int y = task->y + (mask >> 8);
-   unsigned outmask, inmask, partmask, partial_mask;
-   unsigned j;
-   __m128i cstep4[3][4];
-
-   outmask = 0;                 /* outside one or more trivial reject planes */
-   partmask = 0;                /* outside one or more trivial accept planes */
-
-   for (j = 0; j < 3; j++) {
-      const int dcdx = -plane[j].dcdx * 4;
-      const int dcdy = plane[j].dcdy * 4;
-      __m128i xdcdy = _mm_set1_epi32(dcdy);
-
-      cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
-      cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
-      cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
-      cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
-
-      {
-        const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
-        const int cox = plane[j].eo * 4;
-        const int cio = plane[j].ei * 4 - 1;
-
-        outmask |= sign_bits4(cstep4[j], c + cox);
-        partmask |= sign_bits4(cstep4[j], c + cio);
-      }
-   }
-
-   if (outmask == 0xffff)
-      return;
-
-   /* Mask of sub-blocks which are inside all trivial accept planes:
-    */
-   inmask = ~partmask & 0xffff;
-
-   /* Mask of sub-blocks which are inside all trivial reject planes,
-    * but outside at least one trivial accept plane:
-    */
-   partial_mask = partmask & ~outmask;
-
-   assert((partial_mask & inmask) == 0);
-
-   /* Iterate over partials:
-    */
-   while (partial_mask) {
-      int i = ffs(partial_mask) - 1;
-      int ix = (i & 3) * 4;
-      int iy = (i >> 2) * 4;
-      int px = x + ix;
-      int py = y + iy; 
-      unsigned mask = 0xffff;
-
-      partial_mask &= ~(1 << i);
-
-      for (j = 0; j < 3; j++) {
-         const int cx = (plane[j].c 
-                        - plane[j].dcdx * px
-                        + plane[j].dcdy * py) * 4;
-
-        mask &= ~sign_bits4(cstep4[j], cx);
-      }
-
-      if (mask)
-        lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
-   }
-
-   /* Iterate over fulls: 
-    */
-   while (inmask) {
-      int i = ffs(inmask) - 1;
-      int ix = (i & 3) * 4;
-      int iy = (i >> 2) * 4;
-      int px = x + ix;
-      int py = y + iy; 
-
-      inmask &= ~(1 << i);
-
-      block_full_4(task, tri, px, py);
-   }
-}
-
-
-void
-lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
-                    const union lp_rast_cmd_arg arg)
-{
-   const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
-   unsigned mask = arg.triangle.plane_mask;
-   const int x = task->x + (mask & 0xff);
-   const int y = task->y + (mask >> 8);
-   unsigned j;
-
-   /* Iterate over partials:
-    */
-   {
-      unsigned mask = 0xffff;
-
-      for (j = 0; j < 3; j++) {
-        const int cx = (plane[j].c 
-                        - plane[j].dcdx * x
-                        + plane[j].dcdy * y);
-
-        const int dcdx = -plane[j].dcdx;
-        const int dcdy = plane[j].dcdy;
-        __m128i xdcdy = _mm_set1_epi32(dcdy);
-
-        __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
-        __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
-        __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
-        __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
-
-        __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
-        __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
-        __m128i result = _mm_packs_epi16(cstep01, cstep23);
-
-        /* Extract the sign bits
-         */
-        mask &= ~_mm_movemask_epi8(result);
-      }
-
-      if (mask)
-        lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
-   }
-}
-
-
 #endif
 
 
@@ -383,10 +254,13 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
 
 #define TAG(x) x##_3
 #define NR_PLANES 3
+#define TRI_4 lp_rast_triangle_3_4
+#define TRI_16 lp_rast_triangle_3_16
 #include "lp_rast_tri_tmp.h"
 
 #define TAG(x) x##_4
 #define NR_PLANES 4
+#define TRI_16 lp_rast_triangle_4_16
 #include "lp_rast_tri_tmp.h"
 
 #define TAG(x) x##_5
index 9830a43ba55c7d493b297286e562eeb7a1d0397e..c8f9956fda407725b39c682eb91c0eb50af913a7 100644 (file)
@@ -245,6 +245,133 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
    }
 }
 
+#if defined(PIPE_ARCH_SSE) && defined(TRI_16)
+/* XXX: special case this when intersection is not required.
+ *      - tile completely within bbox,
+ *      - bbox completely within tile.
+ */
+void
+TRI_16(struct lp_rasterizer_task *task,
+       const union lp_rast_cmd_arg arg)
+{
+   const struct lp_rast_triangle *tri = arg.triangle.tri;
+   const struct lp_rast_plane *plane = tri->plane;
+   unsigned mask = arg.triangle.plane_mask;
+   unsigned outmask, partial_mask;
+   unsigned j;
+   __m128i cstep4[NR_PLANES][4];
+
+   int x = (mask & 0xff);
+   int y = (mask >> 8);
+
+   outmask = 0;                 /* outside one or more trivial reject planes */
+   
+   x += task->x;
+   y += task->y;
+
+   for (j = 0; j < NR_PLANES; j++) {
+      const int dcdx = -plane[j].dcdx * 4;
+      const int dcdy = plane[j].dcdy * 4;
+      __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+      cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
+      cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
+      cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
+      cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
+
+      {
+        const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+        const int cox = plane[j].eo * 4;
+
+        outmask |= sign_bits4(cstep4[j], c + cox);
+      }
+   }
+
+   if (outmask == 0xffff)
+      return;
+
+
+   /* Mask of sub-blocks which are inside all trivial reject planes,
+    * but outside at least one trivial accept plane:
+    */
+   partial_mask = 0xffff & ~outmask;
+
+   /* Iterate over partials:
+    */
+   while (partial_mask) {
+      int i = ffs(partial_mask) - 1;
+      int ix = (i & 3) * 4;
+      int iy = (i >> 2) * 4;
+      int px = x + ix;
+      int py = y + iy; 
+      unsigned mask = 0xffff;
+
+      partial_mask &= ~(1 << i);
+
+      for (j = 0; j < NR_PLANES; j++) {
+         const int cx = (plane[j].c 
+                        - plane[j].dcdx * px
+                        + plane[j].dcdy * py) * 4;
+
+        mask &= ~sign_bits4(cstep4[j], cx);
+      }
+
+      if (mask)
+        lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
+   }
+}
+#endif
+
+#if defined(PIPE_ARCH_SSE) && defined(TRI_4)
+void
+TRI_4(struct lp_rasterizer_task *task,
+      const union lp_rast_cmd_arg arg)
+{
+   const struct lp_rast_triangle *tri = arg.triangle.tri;
+   const struct lp_rast_plane *plane = tri->plane;
+   unsigned mask = arg.triangle.plane_mask;
+   const int x = task->x + (mask & 0xff);
+   const int y = task->y + (mask >> 8);
+   unsigned j;
+
+   /* Iterate over partials:
+    */
+   {
+      unsigned mask = 0xffff;
+
+      for (j = 0; j < NR_PLANES; j++) {
+        const int cx = (plane[j].c 
+                        - plane[j].dcdx * x
+                        + plane[j].dcdy * y);
+
+        const int dcdx = -plane[j].dcdx;
+        const int dcdy = plane[j].dcdy;
+        __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+        __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
+        __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+        __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+        __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+        __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+        __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+        __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+        /* Extract the sign bits
+         */
+        mask &= ~_mm_movemask_epi8(result);
+      }
+
+      if (mask)
+        lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+   }
+}
+#endif
+
+
+
 #undef TAG
+#undef TRI_4
+#undef TRI_16
 #undef NR_PLANES
 
index 9f871011d8be7038702ab8fac21556febbefbf0a..8fd034666c32e8b272bd521bfe0805d97cd52312 100644 (file)
@@ -479,15 +479,14 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
    {
       int ix0 = bbox->x0 / TILE_SIZE;
       int iy0 = bbox->y0 / TILE_SIZE;
+      int px = bbox->x0 & 63 & ~3;
+      int py = bbox->y0 & 63 & ~3;
+      int mask = px | (py << 8);
 
       assert(iy0 == bbox->y1 / TILE_SIZE &&
             ix0 == bbox->x1 / TILE_SIZE);
 
       if (nr_planes == 3) {
-         int px = bbox->x0 & 63 & ~3;
-         int py = bbox->y0 & 63 & ~3;
-        int mask = px | (py << 8);
-
          if (sz < 4)
          {
             /* Triangle is contained in a single 4x4 stamp:
@@ -507,6 +506,12 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
                                          lp_rast_arg_triangle(tri, mask) );
          }
       }
+      else if (nr_planes == 4 && sz < 16) 
+      {
+         return lp_scene_bin_command( scene, ix0, iy0,
+                                      LP_RAST_OP_TRIANGLE_4_16,
+                                      lp_rast_arg_triangle(tri, mask) );
+      }
 
 
       /* Triangle is contained in a single tile: