1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Binning code for triangles
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
39 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
41 static void constant_coef( struct tgsi_interp_coef
*coef
,
46 coef
->a0
[i
] = v3
[vert_attr
][i
];
52 * Compute a0, dadx and dady for a linearly interpolated coefficient,
55 static void linear_coef( struct triangle
*tri
,
56 struct tgsi_interp_coef
*coef
,
63 float a1
= v1
[vert_attr
][i
];
64 float a2
= v2
[vert_attr
][i
];
65 float a3
= v3
[vert_attr
][i
];
69 float dadx
= (da12
* tri
->dy31
- tri
->dy12
* da31
) * tri
->oneoverarea
;
70 float dady
= (da31
* tri
->dx12
- tri
->dx31
* da12
) * tri
->oneoverarea
;
75 /* calculate a0 as the value which would be sampled for the
76 * fragment at (0,0), taking into account that we want to sample at
77 * pixel centers, in other words (0.5, 0.5).
79 * this is neat but unfortunately not a good way to do things for
80 * triangles with very large values of dadx or dady as it will
81 * result in the subtraction and re-addition from a0 of a very
82 * large number, which means we'll end up loosing a lot of the
83 * fractional bits and precision from a0. the way to fix this is
84 * to define a0 as the sample at a pixel center somewhere near vmin
85 * instead - i'll switch to this later.
87 coef
->a0
[i
] = (v1
[vert_attr
][i
] -
88 (dadx
* (v1
[0][0] - 0.5f
) +
89 dady
* (v1
[0][1] - 0.5f
)));
94 * Compute a0, dadx and dady for a perspective-corrected interpolant,
96 * We basically multiply the vertex value by 1/w before computing
97 * the plane coefficients (a0, dadx, dady).
98 * Later, when we compute the value at a particular fragment position we'll
99 * divide the interpolated value by the interpolated W at that fragment.
101 static void perspective_coef( struct triangle
*tri
,
102 struct tgsi_interp_coef
*coef
,
103 const float (*v1
)[4],
104 const float (*v2
)[4],
105 const float (*v3
)[4],
109 /* premultiply by 1/w (v[0][3] is always 1/w):
111 float a1
= v1
[vert_attr
][i
] * v1
[0][3];
112 float a2
= v2
[vert_attr
][i
] * v2
[0][3];
113 float a3
= v3
[vert_attr
][i
] * v3
[0][3];
114 float da12
= a1
- a2
;
115 float da31
= a3
- a1
;
116 float dadx
= (da12
* tri
->dy31
- tri
->dy12
* da31
) * tri
->oneoverarea
;
117 float dady
= (da31
* tri
->dx12
- tri
->dx31
* da12
) * tri
->oneoverarea
;
120 coef
->dadx
[i
] = dadx
;
121 coef
->dady
[i
] = dady
;
123 (dadx
* (v1
[0][0] - 0.5f
) +
124 dady
* (v1
[0][1] - 0.5f
)));
129 * Special coefficient setup for gl_FragCoord.
130 * X and Y are trivial, though Y has to be inverted for OpenGL.
131 * Z and W are copied from position_coef which should have already been computed.
132 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
135 setup_fragcoord_coef(struct triangle
*tri
, unsigned slot
)
138 tri
->coef
[slot
].a0
[0] = 0.0;
139 tri
->coef
[slot
].dadx
[0] = 1.0;
140 tri
->coef
[slot
].dady
[0] = 0.0;
142 tri
->coef
[slot
].a0
[1] = 0.0;
143 tri
->coef
[slot
].dadx
[1] = 0.0;
144 tri
->coef
[slot
].dady
[1] = 1.0;
146 tri
->coef
[slot
].a0
[2] = tri
->position_coef
.a0
[2];
147 tri
->coef
[slot
].dadx
[2] = tri
->position_coef
.dadx
[2];
148 tri
->coef
[slot
].dady
[2] = tri
->position_coef
.dady
[2];
150 tri
->coef
[slot
].a0
[3] = tri
->position_coef
.a0
[3];
151 tri
->coef
[slot
].dadx
[3] = tri
->position_coef
.dadx
[3];
152 tri
->coef
[slot
].dady
[3] = tri
->position_coef
.dady
[3];
158 * Compute the tri->coef[] array dadx, dady, a0 values.
160 static void setup_tri_coefficients( struct setup_context
*setup
,
161 struct triangle
*tri
,
162 const float (*v1
)[4],
163 const float (*v2
)[4],
164 const float (*v3
)[4],
167 const struct vertex_info
*vinfo
= setup
->vinfo
;
170 /* z and w are done by linear interpolation:
172 linear_coef(tri
, tri
->position_coef
, v1
, v2
, v3
, 0, 2);
173 linear_coef(tri
, tri
->position_coef
, v1
, v2
, v3
, 0, 3);
175 /* setup interpolation for all the remaining attributes:
177 for (input
= 0; input
< vinfo
->num_fs_inputs
; input
++) {
178 unsigned vert_attr
= vinfo
->attrib
[input
].src_index
;
181 switch (vinfo
->attrib
[input
].interp_mode
) {
182 case INTERP_CONSTANT
:
183 for (i
= 0; i
< NUM_CHANNELS
; i
++)
184 constant_coef(tri
->coef
[input
], v3
, vert_attr
, i
);
188 for (i
= 0; i
< NUM_CHANNELS
; i
++)
189 linear_coef(tri
, tri
->coef
[input
], v1
, v2
, v3
, vert_attr
, i
);
192 case INTERP_PERSPECTIVE
:
193 for (i
= 0; i
< NUM_CHANNELS
; i
++)
194 perspective_coef(tri
, tri
->coef
[input
], v1
, v2
, v3
, vert_attr
, i
);
198 setup_fragcoord_coef(tri
, input
);
202 tri
->coef
[input
].a0
[0] = 1.0f
- frontface
;
203 tri
->coef
[input
].dadx
[0] = 0.0;
204 tri
->coef
[input
].dady
[0] = 0.0;
215 /* XXX: do this by add/subtracting a large floating point number:
217 static inline float subpixel_snap( float a
)
220 return (float)i
* (1.0/16);
227 /* to avoid having to allocate power-of-four, square render targets,
228 * end up having a specialized version of the above that runs only at
231 * at the topmost level there may be an arbitary number of steps on
232 * either dimension, so this loop needs to be either separately
233 * code-generated and unrolled for each render target size, or kept as
234 * generic looping code:
237 #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
238 #define MAX3(a,b,c) MAX2(MAX2(a,b),c)
241 do_triangle_ccw(struct lp_setup
*setup
,
242 const float (*v1
)[4],
243 const float (*v2
)[4],
244 const float (*v3
)[4],
245 boolean frontfacing
)
247 const int rt_width
= setup
->framebuffer
.cbufs
[0]->width
;
248 const int rt_height
= setup
->framebuffer
.cbufs
[0]->height
;
250 const float y1
= subpixel_snap(v1
[0][1]);
251 const float y2
= subpixel_snap(v2
[0][1]);
252 const float y3
= subpixel_snap(v3
[0][1]);
254 const float x1
= subpixel_snap(v1
[0][0]);
255 const float x2
= subpixel_snap(v2
[0][0]);
256 const float x3
= subpixel_snap(v3
[0][0]);
258 struct triangle
*tri
= allocate_triangle( setup
);
262 int minx
, maxx
, miny
, maxy
;
272 area
= (tri
->dx12
* tri
->dy31
-
273 tri
->dx31
* tri
->dy12
);
275 /* Cull non-ccw and zero-sized triangles.
277 if (area
<= 0 || util_is_inf_or_nan(area
))
280 // Bounding rectangle
281 minx
= util_iround(MIN3(x1
, x2
, x3
) - .5);
282 maxx
= util_iround(MAX3(x1
, x2
, x3
) + .5);
283 miny
= util_iround(MIN3(y1
, y2
, y3
) - .5);
284 maxy
= util_iround(MAX3(y1
, y2
, y3
) + .5);
286 /* Clamp to framebuffer (or tile) dimensions:
288 miny
= MAX2(0, miny
);
289 minx
= MAX2(0, minx
);
290 maxy
= MIN2(rt_height
, maxy
);
291 maxx
= MIN2(rt_width
, maxx
);
293 if (miny
== maxy
|| minx
== maxx
)
296 /* The only divide in this code. Is it really needed?
298 tri
->oneoverarea
= 1.0f
/ area
;
300 /* Setup parameter interpolants:
302 setup_tri_coefficients( setup
, tri
, v1
, v2
, v3
, frontfacing
);
304 /* half-edge constants, will be interated over the whole
307 c1
= tri
->dy12
* x1
- tri
->dx12
* y1
;
308 c2
= tri
->dy23
* x2
- tri
->dx23
* y2
;
309 c3
= tri
->dy31
* x3
- tri
->dx31
* y3
;
311 /* correct for top-left fill convention:
313 if (tri
->dy12
< 0 || (tri
->dy12
== 0 && tri
->dx12
> 0)) c1
++;
314 if (tri
->dy23
< 0 || (tri
->dy23
== 0 && tri
->dx23
> 0)) c2
++;
315 if (tri
->dy31
< 0 || (tri
->dy31
== 0 && tri
->dx31
> 0)) c3
++;
317 /* find trivial reject offsets for each edge for a single-pixel
318 * sized block. These will be scaled up at each recursive level to
319 * match the active blocksize. Scaling in this way works best if
320 * the blocks are square.
323 if (tri
->dy12
< 0) tri
->eo1
-= tri
->dy12
;
324 if (tri
->dx12
> 0) tri
->eo1
+= tri
->dx12
;
327 if (tri
->dy23
< 0) tri
->eo2
-= tri
->dy23
;
328 if (tri
->dx23
> 0) tri
->eo2
+= tri
->dx23
;
331 if (tri
->dy31
< 0) tri
->eo3
-= tri
->dy31
;
332 if (tri
->dx31
> 0) tri
->eo3
+= tri
->dx31
;
334 /* Calculate trivial accept offsets from the above.
336 tri
->ei1
= tri
->dx12
- tri
->dy12
- tri
->eo1
;
337 tri
->ei2
= tri
->dx23
- tri
->dy23
- tri
->eo2
;
338 tri
->ei3
= tri
->dx31
- tri
->dy31
- tri
->eo3
;
340 minx
&= ~(TILESIZE
-1); /* aligned blocks */
341 miny
&= ~(TILESIZE
-1); /* aligned blocks */
343 c1
+= tri
->dx12
* miny
- tri
->dy12
* minx
;
344 c2
+= tri
->dx23
* miny
- tri
->dy23
* minx
;
345 c3
+= tri
->dx31
* miny
- tri
->dy31
* minx
;
347 /* Convert to tile coordinates:
354 if (miny
== maxy
&& minx
== maxx
)
356 /* Triangle is contained in a single tile:
358 bin_command(setup
->tile
[minx
][miny
], lp_rast_triangle
, tri
);
362 const int step
= TILESIZE
;
364 float ei1
= tri
->ei1
* step
;
365 float ei2
= tri
->ei2
* step
;
366 float ei3
= tri
->ei3
* step
;
368 float eo1
= tri
->eo1
* step
;
369 float eo2
= tri
->eo2
* step
;
370 float eo3
= tri
->eo3
* step
;
372 float xstep1
= -step
* tri
->dy12
;
373 float xstep2
= -step
* tri
->dy23
;
374 float xstep3
= -step
* tri
->dy31
;
376 float ystep1
= step
* tri
->dx12
;
377 float ystep2
= step
* tri
->dx23
;
378 float ystep3
= step
* tri
->dx31
;
382 /* Subdivide space into NxM blocks, where each block is square and
383 * power-of-four in dimension.
385 * Trivially accept or reject blocks, else jump to per-pixel
388 for (y
= miny
; y
< maxy
; y
++)
394 for (x
= minx
; x
< maxx
; x
++)
402 else if (cx1
+ ei1
> 0 &&
406 /* shade whole tile */
407 bin_command(setup
->tile
[x
][y
], lp_rast_shade_tile
, &tri
->inputs
);
411 /* shade partial tile */
412 bin_command(setup
->tile
[x
][y
], lp_rast_triangle
, tri
);
415 /* Iterate cx values across the region:
422 /* Iterate c values down the region:
431 static void triangle_cw( struct setup_context
*setup
,
432 const float (*v0
)[4],
433 const float (*v1
)[4],
434 const float (*v2
)[4] )
436 do_triangle_ccw( setup
, v1
, v0
, v2
, !setup
->ccw_is_frontface
);
439 static void triangle_ccw( struct setup_context
*setup
,
440 const float (*v0
)[4],
441 const float (*v1
)[4],
442 const float (*v2
)[4] )
444 do_triangle_ccw( setup
, v0
, v1
, v2
, setup
->ccw_is_frontface
);
447 static void triangle_both( struct setup_context
*setup
,
448 const float (*v0
)[4],
449 const float (*v1
)[4],
450 const float (*v2
)[4] )
452 /* edge vectors e = v0 - v2, f = v1 - v2 */
453 const float ex
= v0
[0][0] - v2
[0][0];
454 const float ey
= v0
[0][1] - v2
[0][1];
455 const float fx
= v1
[0][0] - v2
[0][0];
456 const float fy
= v1
[0][1] - v2
[0][1];
458 /* det = cross(e,f).z */
459 if (ex
* fy
- ey
* fx
< 0)
460 triangle_ccw( setup
, v0
, v1
, v2
);
462 triangle_cw( setup
, v0
, v1
, v2
);
465 static void triangle_nop( struct setup_context
*setup
,
466 const float (*v0
)[4],
467 const float (*v1
)[4],
468 const float (*v2
)[4] )
472 void setup_set_tri_state( struct setup_context
*setup
,
474 boolean ccw_is_frontface
)
476 setup
->ccw_is_frontface
= ccw_is_frontface
;
479 case PIPE_WINDING_NONE
:
480 setup
->triangle
= triangle_both
;
482 case PIPE_WINDING_CCW
:
483 setup
->triangle
= triangle_cw
;
485 case PIPE_WINDING_CW
:
486 setup
->triangle
= triangle_ccw
;
489 setup
->triangle
= triangle_nop
;