Merge branch 'glsl2-head' into glsl2
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Rasterization for binned triangles within a tile
30 */
31
32 #include <limits.h>
33 #include "util/u_math.h"
34 #include "lp_debug.h"
35 #include "lp_perf.h"
36 #include "lp_rast_priv.h"
37 #include "lp_tile_soa.h"
38
39
40 /**
41 * Map an index in [0,15] to an x,y position, multiplied by 4.
42 * This is used to get the position of each subtile in a 4x4
43 * grid of edge step values.
44 * Note: we can use some bit twiddling to compute these values instead
45 * of using a look-up table, but there's no measurable performance
46 * difference.
47 */
48 static const int pos_table4[16][2] = {
49 { 0, 0 },
50 { 4, 0 },
51 { 0, 4 },
52 { 4, 4 },
53 { 8, 0 },
54 { 12, 0 },
55 { 8, 4 },
56 { 12, 4 },
57 { 0, 8 },
58 { 4, 8 },
59 { 0, 12 },
60 { 4, 12 },
61 { 8, 8 },
62 { 12, 8 },
63 { 8, 12 },
64 { 12, 12 }
65 };
66
67
68 static const int pos_table16[16][2] = {
69 { 0, 0 },
70 { 16, 0 },
71 { 0, 16 },
72 { 16, 16 },
73 { 32, 0 },
74 { 48, 0 },
75 { 32, 16 },
76 { 48, 16 },
77 { 0, 32 },
78 { 16, 32 },
79 { 0, 48 },
80 { 16, 48 },
81 { 32, 32 },
82 { 48, 32 },
83 { 32, 48 },
84 { 48, 48 }
85 };
86
87
88 /**
89 * Shade all pixels in a 4x4 block.
90 */
91 static void
92 block_full_4(struct lp_rasterizer_task *task,
93 const struct lp_rast_triangle *tri,
94 int x, int y)
95 {
96 lp_rast_shade_quads_all(task, &tri->inputs, x, y);
97 }
98
99
100 /**
101 * Shade all pixels in a 16x16 block.
102 */
103 static void
104 block_full_16(struct lp_rasterizer_task *task,
105 const struct lp_rast_triangle *tri,
106 int x, int y)
107 {
108 unsigned ix, iy;
109 assert(x % 16 == 0);
110 assert(y % 16 == 0);
111 for (iy = 0; iy < 16; iy += 4)
112 for (ix = 0; ix < 16; ix += 4)
113 block_full_4(task, tri, x + ix, y + iy);
114 }
115
116
117 /**
118 * Pass the 4x4 pixel block to the shader function.
119 * Determination of which of the 16 pixels lies inside the triangle
120 * will be done as part of the fragment shader.
121 */
122 static void
123 do_block_4(struct lp_rasterizer_task *task,
124 const struct lp_rast_triangle *tri,
125 int x, int y,
126 int c1, int c2, int c3)
127 {
128 assert(x >= 0);
129 assert(y >= 0);
130
131 lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3);
132 }
133
134
135 /**
136 * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
137 * of the triangle's bounds.
138 */
139 static void
140 do_block_16(struct lp_rasterizer_task *task,
141 const struct lp_rast_triangle *tri,
142 int x, int y,
143 int c0, int c1, int c2)
144 {
145 unsigned mask = 0;
146 int eo[3];
147 int c[3];
148 int i, j;
149
150 assert(x >= 0);
151 assert(y >= 0);
152 assert(x % 16 == 0);
153 assert(y % 16 == 0);
154
155 eo[0] = tri->eo1 * 4;
156 eo[1] = tri->eo2 * 4;
157 eo[2] = tri->eo3 * 4;
158
159 c[0] = c0;
160 c[1] = c1;
161 c[2] = c2;
162
163 for (j = 0; j < 3; j++) {
164 const int *step = tri->inputs.step[j];
165 const int cx = c[j] + eo[j];
166
167 /* Mask has bits set whenever we are outside any of the edges.
168 */
169 for (i = 0; i < 16; i++) {
170 int out = cx + step[i] * 4;
171 mask |= (out >> 31) & (1 << i);
172 }
173 }
174
175 mask = ~mask & 0xffff;
176 while (mask) {
177 int i = ffs(mask) - 1;
178 int px = x + pos_table4[i][0];
179 int py = y + pos_table4[i][1];
180 int cx1 = c0 + tri->inputs.step[0][i] * 4;
181 int cx2 = c1 + tri->inputs.step[1][i] * 4;
182 int cx3 = c2 + tri->inputs.step[2][i] * 4;
183
184 mask &= ~(1 << i);
185
186 /* Don't bother testing if the 4x4 block is entirely in/out of
187 * the triangle. It's a little faster to do it in the jit code.
188 */
189 LP_COUNT(nr_non_empty_4);
190 do_block_4(task, tri, px, py, cx1, cx2, cx3);
191 }
192 }
193
194
195 /**
196 * Scan the tile in chunks and figure out which pixels to rasterize
197 * for this triangle.
198 */
199 void
200 lp_rast_triangle(struct lp_rasterizer_task *task,
201 const union lp_rast_cmd_arg arg)
202 {
203 const struct lp_rast_triangle *tri = arg.triangle;
204 const int x = task->x, y = task->y;
205 int ei[3], eo[3], c[3];
206 unsigned outmask, inmask, partial_mask;
207 unsigned i, j;
208
209 c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x;
210 c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x;
211 c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x;
212
213 eo[0] = tri->eo1 * 16;
214 eo[1] = tri->eo2 * 16;
215 eo[2] = tri->eo3 * 16;
216
217 ei[0] = tri->ei1 * 16;
218 ei[1] = tri->ei2 * 16;
219 ei[2] = tri->ei3 * 16;
220
221 outmask = 0;
222 inmask = 0xffff;
223
224 for (j = 0; j < 3; j++) {
225 const int *step = tri->inputs.step[j];
226 const int cox = c[j] + eo[j];
227 const int cio = ei[j]- eo[j];
228
229 /* Outmask has bits set whenever we are outside any of the
230 * edges.
231 */
232 /* Inmask has bits set whenever we are inside all of the edges.
233 */
234 for (i = 0; i < 16; i++) {
235 int out = cox + step[i] * 16;
236 int in = out + cio;
237 outmask |= (out >> 31) & (1 << i);
238 inmask &= ~((in >> 31) & (1 << i));
239 }
240 }
241
242 assert((outmask & inmask) == 0);
243
244 if (outmask == 0xffff)
245 return;
246
247 /* Invert mask, so that bits are set whenever we are at least
248 * partially inside all of the edges:
249 */
250 partial_mask = ~inmask & ~outmask & 0xffff;
251
252 /* Iterate over partials:
253 */
254 while (partial_mask) {
255 int i = ffs(partial_mask) - 1;
256 int px = x + pos_table16[i][0];
257 int py = y + pos_table16[i][1];
258 int cx1 = c[0] + tri->inputs.step[0][i] * 16;
259 int cx2 = c[1] + tri->inputs.step[1][i] * 16;
260 int cx3 = c[2] + tri->inputs.step[2][i] * 16;
261
262 partial_mask &= ~(1 << i);
263
264 LP_COUNT(nr_partially_covered_16);
265 do_block_16(task, tri, px, py, cx1, cx2, cx3);
266 }
267
268 /* Iterate over fulls:
269 */
270 while (inmask) {
271 int i = ffs(inmask) - 1;
272 int px = x + pos_table16[i][0];
273 int py = y + pos_table16[i][1];
274
275 inmask &= ~(1 << i);
276
277 LP_COUNT(nr_fully_covered_16);
278 block_full_16(task, tri, px, py);
279 }
280 }