llvmpipe: Only invoke the shader if necessary.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Rasterization for binned triangles within a tile
30 */
31
32 #include "util/u_math.h"
33 #include "lp_rast_priv.h"
34 #include "lp_tile_soa.h"
35
36
37 #define BLOCKSIZE 8
38
39
40 /* Convert 8x8 block into four runs of quads and render each in turn.
41 */
42 #if (BLOCKSIZE == 8)
43 static void block_full( struct lp_rasterizer *rast,
44 const struct lp_rast_triangle *tri,
45 int x, int y )
46 {
47 const unsigned masks[4] = {~0, ~0, ~0, ~0};
48 int iy;
49
50 for (iy = 0; iy < 8; iy += 2)
51 lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks);
52 }
53 #else
54 static void block_full( struct lp_rasterizer *rast,
55 const struct lp_rast_triangle *tri,
56 int x, int y )
57 {
58 const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */
59 int iy;
60
61 for (iy = 0; iy < 4; iy += 2)
62 lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks);
63 }
64 #endif
65
66 static INLINE unsigned
67 do_quad( const struct lp_rast_triangle *tri,
68 int x, int y,
69 float c1, float c2, float c3 )
70 {
71 float xstep1 = -tri->dy12;
72 float xstep2 = -tri->dy23;
73 float xstep3 = -tri->dy31;
74
75 float ystep1 = tri->dx12;
76 float ystep2 = tri->dx23;
77 float ystep3 = tri->dx31;
78
79 unsigned mask = 0;
80
81 if (c1 > 0 &&
82 c2 > 0 &&
83 c3 > 0)
84 mask |= 1;
85
86 if (c1 + xstep1 > 0 &&
87 c2 + xstep2 > 0 &&
88 c3 + xstep3 > 0)
89 mask |= 2;
90
91 if (c1 + ystep1 > 0 &&
92 c2 + ystep2 > 0 &&
93 c3 + ystep3 > 0)
94 mask |= 4;
95
96 if (c1 + ystep1 + xstep1 > 0 &&
97 c2 + ystep2 + xstep2 > 0 &&
98 c3 + ystep3 + xstep3 > 0)
99 mask |= 8;
100
101 return mask;
102 }
103
104 /* Evaluate each pixel in a block, generate a mask and possibly render
105 * the quad:
106 */
107 static void
108 do_block( struct lp_rasterizer *rast,
109 const struct lp_rast_triangle *tri,
110 int x, int y,
111 float c1,
112 float c2,
113 float c3 )
114 {
115 const int step = 2;
116
117 float xstep1 = -step * tri->dy12;
118 float xstep2 = -step * tri->dy23;
119 float xstep3 = -step * tri->dy31;
120
121 float ystep1 = step * tri->dx12;
122 float ystep2 = step * tri->dx23;
123 float ystep3 = step * tri->dx31;
124
125 int ix, iy;
126
127 for (iy = 0; iy < BLOCKSIZE; iy += 2) {
128 float cx1 = c1;
129 float cx2 = c2;
130 float cx3 = c3;
131
132 unsigned masks[4] = {0, 0, 0, 0};
133
134 for (ix = 0; ix < BLOCKSIZE; ix += 2) {
135
136 masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3);
137
138 cx1 += xstep1;
139 cx2 += xstep2;
140 cx3 += xstep3;
141 }
142
143 if(masks[0] || masks[1] || masks[2] || masks[3])
144 lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks);
145
146 c1 += ystep1;
147 c2 += ystep2;
148 c3 += ystep3;
149 }
150
151 }
152
153
154
155 /* Scan the tile in chunks and figure out which pixels to rasterize
156 * for this triangle:
157 */
158 void lp_rast_triangle( struct lp_rasterizer *rast,
159 const union lp_rast_cmd_arg arg )
160 {
161 const struct lp_rast_triangle *tri = arg.triangle;
162
163 const int step = BLOCKSIZE;
164
165 float ei1 = tri->ei1 * step;
166 float ei2 = tri->ei2 * step;
167 float ei3 = tri->ei3 * step;
168
169 float eo1 = tri->eo1 * step;
170 float eo2 = tri->eo2 * step;
171 float eo3 = tri->eo3 * step;
172
173 float xstep1 = -step * tri->dy12;
174 float xstep2 = -step * tri->dy23;
175 float xstep3 = -step * tri->dy31;
176
177 float ystep1 = step * tri->dx12;
178 float ystep2 = step * tri->dx23;
179 float ystep3 = step * tri->dx31;
180
181 /* Clamp to tile dimensions:
182 */
183 int minx = MAX2(tri->minx, rast->x);
184 int miny = MAX2(tri->miny, rast->y);
185 int maxx = MIN2(tri->maxx, rast->x + TILE_SIZE);
186 int maxy = MIN2(tri->maxy, rast->y + TILE_SIZE);
187
188 int x, y;
189 float x0, y0;
190 float c1, c2, c3;
191
192 debug_printf("%s\n", __FUNCTION__);
193
194 if (miny == maxy || minx == maxx) {
195 debug_printf("%s: non-intersecting triangle in bin\n", __FUNCTION__);
196 return;
197 }
198
199 minx &= ~(step-1);
200 miny &= ~(step-1);
201
202 x0 = (float)minx;
203 y0 = (float)miny;
204
205 c1 = tri->c1 + tri->dx12 * y0 - tri->dy12 * x0;
206 c2 = tri->c2 + tri->dx23 * y0 - tri->dy23 * x0;
207 c3 = tri->c3 + tri->dx31 * y0 - tri->dy31 * x0;
208
209 for (y = miny; y < maxy; y += step)
210 {
211 float cx1 = c1;
212 float cx2 = c2;
213 float cx3 = c3;
214
215 for (x = minx; x < maxx; x += step)
216 {
217 if (cx1 + eo1 < 0 ||
218 cx2 + eo2 < 0 ||
219 cx3 + eo3 < 0)
220 {
221 }
222 else if (cx1 + ei1 > 0 &&
223 cx2 + ei2 > 0 &&
224 cx3 + ei3 > 0)
225 {
226 block_full(rast, tri, x, y); /* trivial accept */
227 }
228 else
229 {
230 do_block(rast, tri, x, y, cx1, cx2, cx3);
231 }
232
233 /* Iterate cx values across the region:
234 */
235 cx1 += xstep1;
236 cx2 += xstep2;
237 cx3 += xstep3;
238 }
239
240 /* Iterate c values down the region:
241 */
242 c1 += ystep1;
243 c2 += ystep2;
244 c3 += ystep3;
245 }
246 }
247