llvmpipe: wip me harder
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for triangles
30 */
31
32 #include "lp_setup.h"
33 #include "lp_state.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36
37
38 /**
39 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
40 */
41 static void constant_coef( struct tgsi_interp_coef *coef,
42 const float (*v3)[4],
43 unsigned vert_attr,
44 unsigned i )
45 {
46 coef->a0[i] = v3[vert_attr][i];
47 coef->dadx[i] = 0;
48 coef->dady[i] = 0;
49 }
50
51 /**
52 * Compute a0, dadx and dady for a linearly interpolated coefficient,
53 * for a triangle.
54 */
55 static void linear_coef( struct triangle *tri,
56 struct tgsi_interp_coef *coef,
57 const float (*v1)[4],
58 const float (*v2)[4],
59 const float (*v3)[4],
60 unsigned vert_attr,
61 unsigned i)
62 {
63 float a1 = v1[vert_attr][i];
64 float a2 = v2[vert_attr][i];
65 float a3 = v3[vert_attr][i];
66
67 float da12 = a1 - a2;
68 float da31 = a3 - a1;
69 float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
70 float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
71
72 coef->dadx[i] = dadx;
73 coef->dady[i] = dady;
74
75 /* calculate a0 as the value which would be sampled for the
76 * fragment at (0,0), taking into account that we want to sample at
77 * pixel centers, in other words (0.5, 0.5).
78 *
79 * this is neat but unfortunately not a good way to do things for
80 * triangles with very large values of dadx or dady as it will
81 * result in the subtraction and re-addition from a0 of a very
82 * large number, which means we'll end up loosing a lot of the
83 * fractional bits and precision from a0. the way to fix this is
84 * to define a0 as the sample at a pixel center somewhere near vmin
85 * instead - i'll switch to this later.
86 */
87 coef->a0[i] = (v1[vert_attr][i] -
88 (dadx * (v1[0][0] - 0.5f) +
89 dady * (v1[0][1] - 0.5f)));
90 }
91
92
93 /**
94 * Compute a0, dadx and dady for a perspective-corrected interpolant,
95 * for a triangle.
96 * We basically multiply the vertex value by 1/w before computing
97 * the plane coefficients (a0, dadx, dady).
98 * Later, when we compute the value at a particular fragment position we'll
99 * divide the interpolated value by the interpolated W at that fragment.
100 */
101 static void perspective_coef( struct triangle *tri,
102 struct tgsi_interp_coef *coef,
103 const float (*v1)[4],
104 const float (*v2)[4],
105 const float (*v3)[4],
106 unsigned vert_attr,
107 unsigned i)
108 {
109 /* premultiply by 1/w (v[0][3] is always 1/w):
110 */
111 float a1 = v1[vert_attr][i] * v1[0][3];
112 float a2 = v2[vert_attr][i] * v2[0][3];
113 float a3 = v3[vert_attr][i] * v3[0][3];
114 float da12 = a1 - a2;
115 float da31 = a3 - a1;
116 float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
117 float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
118
119
120 coef->dadx[i] = dadx;
121 coef->dady[i] = dady;
122 coef->a0[i] = (a1 -
123 (dadx * (v1[0][0] - 0.5f) +
124 dady * (v1[0][1] - 0.5f)));
125 }
126
127
128 /**
129 * Special coefficient setup for gl_FragCoord.
130 * X and Y are trivial, though Y has to be inverted for OpenGL.
131 * Z and W are copied from position_coef which should have already been computed.
132 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
133 */
134 static void
135 setup_fragcoord_coef(struct triangle *tri, unsigned slot)
136 {
137 /*X*/
138 tri->coef[slot].a0[0] = 0.0;
139 tri->coef[slot].dadx[0] = 1.0;
140 tri->coef[slot].dady[0] = 0.0;
141 /*Y*/
142 tri->coef[slot].a0[1] = 0.0;
143 tri->coef[slot].dadx[1] = 0.0;
144 tri->coef[slot].dady[1] = 1.0;
145 /*Z*/
146 tri->coef[slot].a0[2] = tri->position_coef.a0[2];
147 tri->coef[slot].dadx[2] = tri->position_coef.dadx[2];
148 tri->coef[slot].dady[2] = tri->position_coef.dady[2];
149 /*W*/
150 tri->coef[slot].a0[3] = tri->position_coef.a0[3];
151 tri->coef[slot].dadx[3] = tri->position_coef.dadx[3];
152 tri->coef[slot].dady[3] = tri->position_coef.dady[3];
153 }
154
155
156
157 /**
158 * Compute the tri->coef[] array dadx, dady, a0 values.
159 */
160 static void setup_tri_coefficients( struct setup_context *setup,
161 struct triangle *tri,
162 const float (*v1)[4],
163 const float (*v2)[4],
164 const float (*v3)[4],
165 boolean frontface )
166 {
167 const struct vertex_info *vinfo = setup->vinfo;
168 unsigned input;
169
170 /* z and w are done by linear interpolation:
171 */
172 linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2);
173 linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3);
174
175 /* setup interpolation for all the remaining attributes:
176 */
177 for (input = 0; input < vinfo->num_fs_inputs; input++) {
178 unsigned vert_attr = vinfo->attrib[input].src_index;
179 unsigned i;
180
181 switch (vinfo->attrib[input].interp_mode) {
182 case INTERP_CONSTANT:
183 for (i = 0; i < NUM_CHANNELS; i++)
184 constant_coef(tri->coef[input], v3, vert_attr, i);
185 break;
186
187 case INTERP_LINEAR:
188 for (i = 0; i < NUM_CHANNELS; i++)
189 linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i);
190 break;
191
192 case INTERP_PERSPECTIVE:
193 for (i = 0; i < NUM_CHANNELS; i++)
194 perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i);
195 break;
196
197 case INTERP_POS:
198 setup_fragcoord_coef(tri, input);
199 break;
200
201 case INTERP_FACING:
202 tri->coef[input].a0[0] = 1.0f - frontface;
203 tri->coef[input].dadx[0] = 0.0;
204 tri->coef[input].dady[0] = 0.0;
205 break;
206
207 default:
208 assert(0);
209 }
210 }
211 }
212
213
214
215 /* XXX: do this by add/subtracting a large floating point number:
216 */
217 static inline float subpixel_snap( float a )
218 {
219 int i = a * 16;
220 return (float)i * (1.0/16);
221 }
222
223
224
225
226
227 /* to avoid having to allocate power-of-four, square render targets,
228 * end up having a specialized version of the above that runs only at
229 * the topmost level.
230 *
231 * at the topmost level there may be an arbitary number of steps on
232 * either dimension, so this loop needs to be either separately
233 * code-generated and unrolled for each render target size, or kept as
234 * generic looping code:
235 */
236
237 #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
238 #define MAX3(a,b,c) MAX2(MAX2(a,b),c)
239
240 static void
241 do_triangle_ccw(struct lp_setup *setup,
242 const float (*v1)[4],
243 const float (*v2)[4],
244 const float (*v3)[4],
245 boolean frontfacing )
246 {
247 const int rt_width = setup->framebuffer.cbufs[0]->width;
248 const int rt_height = setup->framebuffer.cbufs[0]->height;
249
250 const float y1 = subpixel_snap(v1[0][1]);
251 const float y2 = subpixel_snap(v2[0][1]);
252 const float y3 = subpixel_snap(v3[0][1]);
253
254 const float x1 = subpixel_snap(v1[0][0]);
255 const float x2 = subpixel_snap(v2[0][0]);
256 const float x3 = subpixel_snap(v3[0][0]);
257
258 struct triangle *tri = allocate_triangle( setup );
259 float area;
260 float c1, c2, c3;
261 int i;
262 int minx, maxx, miny, maxy;
263
264 tri->dx12 = x1 - x2;
265 tri->dx23 = x2 - x3;
266 tri->dx31 = x3 - x1;
267
268 tri->dy12 = y1 - y2;
269 tri->dy23 = y2 - y3;
270 tri->dy31 = y3 - y1;
271
272 area = (tri->dx12 * tri->dy31 -
273 tri->dx31 * tri->dy12);
274
275 /* Cull non-ccw and zero-sized triangles.
276 */
277 if (area <= 0 || util_is_inf_or_nan(area))
278 return;
279
280 // Bounding rectangle
281 minx = util_iround(MIN3(x1, x2, x3) - .5);
282 maxx = util_iround(MAX3(x1, x2, x3) + .5);
283 miny = util_iround(MIN3(y1, y2, y3) - .5);
284 maxy = util_iround(MAX3(y1, y2, y3) + .5);
285
286 /* Clamp to framebuffer (or tile) dimensions:
287 */
288 miny = MAX2(0, miny);
289 minx = MAX2(0, minx);
290 maxy = MIN2(rt_height, maxy);
291 maxx = MIN2(rt_width, maxx);
292
293 if (miny == maxy || minx == maxx)
294 return;
295
296 /* The only divide in this code. Is it really needed?
297 */
298 tri->oneoverarea = 1.0f / area;
299
300 /* Setup parameter interpolants:
301 */
302 setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing );
303
304 /* half-edge constants, will be interated over the whole
305 * rendertarget.
306 */
307 c1 = tri->dy12 * x1 - tri->dx12 * y1;
308 c2 = tri->dy23 * x2 - tri->dx23 * y2;
309 c3 = tri->dy31 * x3 - tri->dx31 * y3;
310
311 /* correct for top-left fill convention:
312 */
313 if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++;
314 if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++;
315 if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++;
316
317 /* find trivial reject offsets for each edge for a single-pixel
318 * sized block. These will be scaled up at each recursive level to
319 * match the active blocksize. Scaling in this way works best if
320 * the blocks are square.
321 */
322 tri->eo1 = 0;
323 if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
324 if (tri->dx12 > 0) tri->eo1 += tri->dx12;
325
326 tri->eo2 = 0;
327 if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
328 if (tri->dx23 > 0) tri->eo2 += tri->dx23;
329
330 tri->eo3 = 0;
331 if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
332 if (tri->dx31 > 0) tri->eo3 += tri->dx31;
333
334 /* Calculate trivial accept offsets from the above.
335 */
336 tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
337 tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
338 tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
339
340 minx &= ~(TILESIZE-1); /* aligned blocks */
341 miny &= ~(TILESIZE-1); /* aligned blocks */
342
343 c1 += tri->dx12 * miny - tri->dy12 * minx;
344 c2 += tri->dx23 * miny - tri->dy23 * minx;
345 c3 += tri->dx31 * miny - tri->dy31 * minx;
346
347 /* Convert to tile coordinates:
348 */
349 minx /= TILESIZE;
350 maxx /= TILESIZE;
351 miny /= TILESIZE;
352 maxy /= TILESIZE;
353
354 if (miny == maxy && minx == maxx)
355 {
356 /* Triangle is contained in a single tile:
357 */
358 bin_command(setup->tile[minx][miny], lp_rast_triangle, tri );
359 }
360 else
361 {
362 const int step = TILESIZE;
363
364 float ei1 = tri->ei1 * step;
365 float ei2 = tri->ei2 * step;
366 float ei3 = tri->ei3 * step;
367
368 float eo1 = tri->eo1 * step;
369 float eo2 = tri->eo2 * step;
370 float eo3 = tri->eo3 * step;
371
372 float xstep1 = -step * tri->dy12;
373 float xstep2 = -step * tri->dy23;
374 float xstep3 = -step * tri->dy31;
375
376 float ystep1 = step * tri->dx12;
377 float ystep2 = step * tri->dx23;
378 float ystep3 = step * tri->dx31;
379 int x, y;
380
381
382 /* Subdivide space into NxM blocks, where each block is square and
383 * power-of-four in dimension.
384 *
385 * Trivially accept or reject blocks, else jump to per-pixel
386 * examination above.
387 */
388 for (y = miny; y < maxy; y++)
389 {
390 float cx1 = c1;
391 float cx2 = c2;
392 float cx3 = c3;
393
394 for (x = minx; x < maxx; x++)
395 {
396 if (cx1 + eo1 < 0 ||
397 cx2 + eo2 < 0 ||
398 cx3 + eo3 < 0)
399 {
400 /* do nothing */
401 }
402 else if (cx1 + ei1 > 0 &&
403 cx2 + ei2 > 0 &&
404 cx3 + ei3 > 0)
405 {
406 /* shade whole tile */
407 bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs );
408 }
409 else
410 {
411 /* shade partial tile */
412 bin_command(setup->tile[x][y], lp_rast_triangle, tri );
413 }
414
415 /* Iterate cx values across the region:
416 */
417 cx1 += xstep1;
418 cx2 += xstep2;
419 cx3 += xstep3;
420 }
421
422 /* Iterate c values down the region:
423 */
424 c1 += ystep1;
425 c2 += ystep2;
426 c3 += ystep3;
427 }
428 }
429 }
430
431 static void triangle_cw( struct setup_context *setup,
432 const float (*v0)[4],
433 const float (*v1)[4],
434 const float (*v2)[4] )
435 {
436 do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
437 }
438
439 static void triangle_ccw( struct setup_context *setup,
440 const float (*v0)[4],
441 const float (*v1)[4],
442 const float (*v2)[4] )
443 {
444 do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
445 }
446
447 static void triangle_both( struct setup_context *setup,
448 const float (*v0)[4],
449 const float (*v1)[4],
450 const float (*v2)[4] )
451 {
452 /* edge vectors e = v0 - v2, f = v1 - v2 */
453 const float ex = v0[0][0] - v2[0][0];
454 const float ey = v0[0][1] - v2[0][1];
455 const float fx = v1[0][0] - v2[0][0];
456 const float fy = v1[0][1] - v2[0][1];
457
458 /* det = cross(e,f).z */
459 if (ex * fy - ey * fx < 0)
460 triangle_ccw( setup, v0, v1, v2 );
461 else
462 triangle_cw( setup, v0, v1, v2 );
463 }
464
465 static void triangle_nop( struct setup_context *setup,
466 const float (*v0)[4],
467 const float (*v1)[4],
468 const float (*v2)[4] )
469 {
470 }
471
472 void setup_set_tri_state( struct setup_context *setup,
473 unsigned cull_mode,
474 boolean ccw_is_frontface)
475 {
476 setup->ccw_is_frontface = ccw_is_frontface;
477
478 switch (cull_mode) {
479 case PIPE_WINDING_NONE:
480 setup->triangle = triangle_both;
481 break;
482 case PIPE_WINDING_CCW:
483 setup->triangle = triangle_cw;
484 break;
485 case PIPE_WINDING_CW:
486 setup->triangle = triangle_ccw;
487 break;
488 default:
489 setup->triangle = triangle_nop;
490 break;
491 }
492 }
493
494