llvmpipe: Rasterize the quads according to what the blend function expects.
authorJosé Fonseca <jfonseca@vmware.com>
Tue, 18 Aug 2009 11:57:16 +0000 (12:57 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Sat, 29 Aug 2009 08:21:34 +0000 (09:21 +0100)
src/gallium/drivers/llvmpipe/lp_quad_blend.c
src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
src/gallium/drivers/llvmpipe/lp_quad_fs.c
src/gallium/drivers/llvmpipe/lp_quad_stipple.c
src/gallium/drivers/llvmpipe/lp_setup.c

index 4312753eefada13cc3e26f694b225aa3b2ad638e..976994f4e8793bbaf587bade2b08c5864eaf6b52 100644 (file)
@@ -144,12 +144,17 @@ blend_run(struct quad_stage *qs,
 
    for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) 
    {
+      unsigned x0 = quads[0]->input.x0;
+      unsigned y0 = quads[0]->input.y0;
       uint8_t ALIGN16_ATTRIB src[4][16];
       uint8_t ALIGN16_ATTRIB dst[4][16];
       struct llvmpipe_cached_tile *tile
-         = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf],
-                              quads[0]->input.x0, 
-                              quads[0]->input.y0);
+         = lp_get_cached_tile(llvmpipe->cbuf_cache[cbuf], x0, y0);
+
+      assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
+
+      assert(x0 % TILE_VECTOR_WIDTH == 0);
+      assert(y0 % TILE_VECTOR_HEIGHT == 0);
 
       for (q = 0; q < nr; q += 4) {
          for (k = 0; k < 4 && q + k < nr; ++k) {
@@ -193,6 +198,8 @@ blend_run(struct quad_stage *qs,
                if (quad->inout.mask & (1 << j)) {
                   int x = itx + (j & 1);
                   int y = ity + (j >> 1);
+                  assert(x < TILE_SIZE);
+                  assert(y < TILE_SIZE);
                   for (i = 0; i < 4; i++) { /* loop over color chans */
                      TILE_PIXEL(tile->data.color, x, y, i) = src[i][4*k + j];
                   }
index d3222a1d0962d086766549b189fd31097c4156a4..48a6671c244b85a2dedad431c96f3262cf896dbd 100644 (file)
@@ -579,6 +579,9 @@ depth_stencil_test_quad(struct quad_stage *qs,
          const float *aaaa = quads[i]->output.color[cbuf][3];           \
          unsigned passMask = 0;                                         \
                                                                         \
+         if (!quads[i]->inout.mask)                                     \
+            continue;                                                   \
+                                                                        \
          if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
          if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
          if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
@@ -587,7 +590,7 @@ depth_stencil_test_quad(struct quad_stage *qs,
          quads[i]->inout.mask &= passMask;                              \
                                                                         \
          if (quads[i]->inout.mask)                                      \
-            quads[pass_nr++] = quads[i];                                \
+            ++pass_nr;                                                  \
       }                                                                 \
                                                                         \
       return pass_nr;                                                   \
@@ -657,7 +660,7 @@ depth_test_quads_fallback(struct quad_stage *qs,
 
 
    if (qs->llvmpipe->depth_stencil->alpha.enabled) {
-      nr = alpha_test_quads(qs, quads, nr);
+      alpha_test_quads(qs, quads, nr);
    }
 
    if (qs->llvmpipe->framebuffer.zsbuf && 
@@ -671,6 +674,9 @@ depth_test_quads_fallback(struct quad_stage *qs,
                                      quads[0]->input.y0);
 
       for (i = 0; i < nr; i++) {
+         if(!quads[i]->inout.mask)
+            continue;
+
          get_depth_stencil_values(&data, quads[i]);
 
          if (qs->llvmpipe->depth_stencil->depth.enabled) {
@@ -694,13 +700,11 @@ depth_test_quads_fallback(struct quad_stage *qs,
             write_depth_stencil_values(&data, quads[i]);
 
          qs->llvmpipe->occlusion_count += mask_count[quads[i]->inout.mask];
-         quads[pass++] = quads[i];
+         ++pass;
       }
-
-      nr = pass;
    }
 
-   if (nr)
+   if (pass)
       qs->next->run(qs->next, quads, nr);
 }
 
@@ -771,11 +775,11 @@ depth_interp_z16_less_write(struct quad_stage *qs,
 
       quads[i]->inout.mask = mask;
       if (quads[i]->inout.mask)
-         quads[pass++] = quads[i];
+         ++pass;
    }
 
    if (pass)
-      qs->next->run(qs->next, quads, pass);
+      qs->next->run(qs->next, quads, nr);
 
 }
 
index c428987a7d4ed7451aaabdfd568405953837ac30..1c4403187299354bed7295f683b778c1c8933dcb 100644 (file)
@@ -204,17 +204,20 @@ shade_quads(struct quad_stage *qs,
    unsigned i, pass = 0;
    
    for (i = 0; i < nr; i++) {
+      if(!quads[i]->inout.mask)
+         continue;
+
       if (!shade_quad(qs, quads[i]))
          continue;
 
       if (/*do_coverage*/ 0)
          coverage_quad( qs, quads[i] );
 
-      quads[pass++] = quads[i];
+      ++pass;
    }
    
    if (pass)
-      qs->next->run(qs->next, quads, pass);
+      qs->next->run(qs->next, quads, nr);
 }
    
 
index 353c1ddcfa5988933c848f6c667640e86b8fcf5d..b89978fd5feefabd5132d06714e19f04ee76b38c 100644 (file)
@@ -34,6 +34,9 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
       const uint stipple0 = llvmpipe->poly_stipple.stipple[y0 % 32];
       const uint stipple1 = llvmpipe->poly_stipple.stipple[y1 % 32];
 
+      if (!quad->inout.mask)
+         continue;
+
       /* turn off quad mask bits that fail the stipple test */
       if ((stipple0 & (bit31 >> col0)) == 0)
          quad->inout.mask &= ~MASK_TOP_LEFT;
@@ -48,10 +51,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr)
          quad->inout.mask &= ~MASK_BOTTOM_RIGHT;
 
       if (quad->inout.mask)
-         quads[pass++] = quad;
+         ++pass;
    }
 
-   qs->next->run(qs->next, quads, pass);
+   if(pass)
+      qs->next->run(qs->next, quads, nr);
 }
 
 
index e62412f0e576be0f6a561a5b3bdc32c2822c1ae8..04ae644ff98854acdbd087741d331f1050f39516 100644 (file)
@@ -45,6 +45,7 @@
 #include "pipe/p_thread.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "lp_tile_soa.h"
 
 
 #define DEBUG_VERTS 0
@@ -195,7 +196,7 @@ static INLINE int block( int x )
 
 static INLINE int block_x( int x )
 {
-   return x & ~(16-1);
+   return x & ~(TILE_VECTOR_WIDTH - 1);
 }
 
 
@@ -204,7 +205,7 @@ static INLINE int block_x( int x )
  */
 static void flush_spans( struct setup_context *setup )
 {
-   const int step = 16;
+   const int step = TILE_VECTOR_WIDTH;
    const int xleft0 = setup->span.left[0];
    const int xleft1 = setup->span.left[1];
    const int xright0 = setup->span.right[0];
@@ -222,6 +223,7 @@ static void flush_spans( struct setup_context *setup )
       unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
       unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
       unsigned lx = x;
+      const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
       unsigned q = 0;
 
       unsigned skipmask_left0 = (1U << skip_left0) - 1U;
@@ -236,21 +238,19 @@ static void flush_spans( struct setup_context *setup )
       unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
 
       if (mask0 | mask1) {
-         do {
+         for(q = 0; q < nr_quads; ++q) {
             unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
-            if (quadmask) {
-               setup->quad[q].input.x0 = lx;
-               setup->quad[q].input.y0 = setup->span.y;
-               setup->quad[q].inout.mask = quadmask;
-               setup->quad_ptrs[q] = &setup->quad[q];
-               q++;
-            }
+            setup->quad[q].input.x0 = lx;
+            setup->quad[q].input.y0 = setup->span.y;
+            setup->quad[q].inout.mask = quadmask;
+            setup->quad_ptrs[q] = &setup->quad[q];
             mask0 >>= 2;
             mask1 >>= 2;
             lx += 2;
-         } while (mask0 | mask1);
+         }
+         assert(!(mask0 | mask1));
 
-         pipe->run( pipe, setup->quad_ptrs, q );
+         pipe->run( pipe, setup->quad_ptrs, nr_quads );
       }
    }