1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Binning code for lines
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
35 #include "lp_setup_context.h"
37 #include "lp_state_fs.h"
39 #define NUM_CHANNELS 4
42 static const int step_scissor_minx
[16] = {
49 static const int step_scissor_maxx
[16] = {
56 static const int step_scissor_miny
[16] = {
63 static const int step_scissor_maxy
[16] = {
73 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
75 static void constant_coef( struct lp_setup_context
*setup
,
76 struct lp_rast_triangle
*tri
,
81 tri
->inputs
.a0
[slot
][i
] = value
;
82 tri
->inputs
.dadx
[slot
][i
] = 0.0f
;
83 tri
->inputs
.dady
[slot
][i
] = 0.0f
;
88 * Compute a0, dadx and dady for a linearly interpolated coefficient,
91 static void linear_coef( struct lp_setup_context
*setup
,
92 struct lp_rast_triangle
*tri
,
100 float a1
= v1
[vert_attr
][i
];
101 float a2
= v2
[vert_attr
][i
];
103 float da21
= a1
- a2
;
104 float dadx
= da21
* tri
->dx
* oneoverarea
;
105 float dady
= da21
* tri
->dy
* oneoverarea
;
107 tri
->inputs
.dadx
[slot
][i
] = dadx
;
108 tri
->inputs
.dady
[slot
][i
] = dady
;
110 tri
->inputs
.a0
[slot
][i
] = (a1
-
111 (dadx
* (v1
[0][0] - setup
->pixel_offset
) +
112 dady
* (v1
[0][1] - setup
->pixel_offset
)));
117 * Compute a0, dadx and dady for a perspective-corrected interpolant,
119 * We basically multiply the vertex value by 1/w before computing
120 * the plane coefficients (a0, dadx, dady).
121 * Later, when we compute the value at a particular fragment position we'll
122 * divide the interpolated value by the interpolated W at that fragment.
124 static void perspective_coef( struct lp_setup_context
*setup
,
125 struct lp_rast_triangle
*tri
,
128 const float (*v1
)[4],
129 const float (*v2
)[4],
133 /* premultiply by 1/w (v[0][3] is always 1/w):
135 float a1
= v1
[vert_attr
][i
] * v1
[0][3];
136 float a2
= v2
[vert_attr
][i
] * v2
[0][3];
138 float da21
= a1
- a2
;
139 float dadx
= da21
* tri
->dx
* oneoverarea
;
140 float dady
= da21
* tri
->dy
* oneoverarea
;
142 tri
->inputs
.dadx
[slot
][i
] = dadx
;
143 tri
->inputs
.dady
[slot
][i
] = dady
;
145 tri
->inputs
.a0
[slot
][i
] = (a1
-
146 (dadx
* (v1
[0][0] - setup
->pixel_offset
) +
147 dady
* (v1
[0][1] - setup
->pixel_offset
)));
151 * Compute the tri->coef[] array dadx, dady, a0 values.
153 static void setup_line_coefficients( struct lp_setup_context
*setup
,
154 struct lp_rast_triangle
*tri
,
156 const float (*v1
)[4],
157 const float (*v2
)[4])
159 unsigned fragcoord_usage_mask
= TGSI_WRITEMASK_XYZ
;
162 /* setup interpolation for all the remaining attributes:
164 for (slot
= 0; slot
< setup
->fs
.nr_inputs
; slot
++) {
165 unsigned vert_attr
= setup
->fs
.input
[slot
].src_index
;
166 unsigned usage_mask
= setup
->fs
.input
[slot
].usage_mask
;
169 switch (setup
->fs
.input
[slot
].interp
) {
170 case LP_INTERP_CONSTANT
:
171 if (setup
->flatshade_first
) {
172 for (i
= 0; i
< NUM_CHANNELS
; i
++)
173 if (usage_mask
& (1 << i
))
174 constant_coef(setup
, tri
, slot
+1, v1
[vert_attr
][i
], i
);
177 for (i
= 0; i
< NUM_CHANNELS
; i
++)
178 if (usage_mask
& (1 << i
))
179 constant_coef(setup
, tri
, slot
+1, v2
[vert_attr
][i
], i
);
183 case LP_INTERP_LINEAR
:
184 for (i
= 0; i
< NUM_CHANNELS
; i
++)
185 if (usage_mask
& (1 << i
))
186 linear_coef(setup
, tri
, oneoverarea
, slot
+1, v1
, v2
, vert_attr
, i
);
189 case LP_INTERP_PERSPECTIVE
:
190 for (i
= 0; i
< NUM_CHANNELS
; i
++)
191 if (usage_mask
& (1 << i
))
192 perspective_coef(setup
, tri
, oneoverarea
, slot
+1, v1
, v2
, vert_attr
, i
);
193 fragcoord_usage_mask
|= TGSI_WRITEMASK_W
;
196 case LP_INTERP_POSITION
:
198 * The generated pixel interpolators will pick up the coeffs from
199 * slot 0, so all need to ensure that the usage mask is covers all
202 fragcoord_usage_mask
|= usage_mask
;
210 /* The internal position input is in slot zero:
212 lp_setup_fragcoord_coef(setup
, tri
, oneoverarea
, 0, v1
, v2
, v2
,
213 fragcoord_usage_mask
);
218 static INLINE
int subpixel_snap( float a
)
220 return util_iround(FIXED_ONE
* a
);
225 * Print line vertex attribs (for debug).
228 print_line(struct lp_setup_context
*setup
,
229 const float (*v1
)[4],
230 const float (*v2
)[4])
234 debug_printf("llvmpipe line\n");
235 for (i
= 0; i
< 1 + setup
->fs
.nr_inputs
; i
++) {
236 debug_printf(" v1[%d]: %f %f %f %f\n", i
,
237 v1
[i
][0], v1
[i
][1], v1
[i
][2], v1
[i
][3]);
239 for (i
= 0; i
< 1 + setup
->fs
.nr_inputs
; i
++) {
240 debug_printf(" v2[%d]: %f %f %f %f\n", i
,
241 v2
[i
][0], v2
[i
][1], v2
[i
][2], v2
[i
][3]);
247 lp_setup_line( struct lp_setup_context
*setup
,
248 const float (*v1
)[4],
249 const float (*v2
)[4])
251 struct lp_scene
*scene
= lp_setup_get_current_scene(setup
);
252 struct lp_rast_triangle
*line
;
254 float half_width
= setup
->line_width
/ 2;
255 int minx
, maxx
, miny
, maxy
;
256 int ix0
, ix1
, iy0
, iy1
;
265 print_line(setup
, v1
, v2
);
267 if (setup
->scissor_test
) {
274 line
= lp_setup_alloc_triangle(scene
,
282 line
->v
[0][0] = v1
[0][0];
283 line
->v
[1][0] = v2
[0][0];
284 line
->v
[0][1] = v1
[0][1];
285 line
->v
[1][1] = v2
[0][1];
288 /* pre-calculation(based on given vertices) to determine if line is
289 * more horizontal or more vertical
291 line
->dx
= v1
[0][0] - v2
[0][0];
292 line
->dy
= v1
[0][1] - v2
[0][1];
295 if (fabsf(line
->dx
) >= fabsf(line
->dy
)) {
297 /* if v2 is to the right of v1, swap pointers */
298 const float (*temp
)[4] = v1
;
301 line
->dx
= -line
->dx
;
302 line
->dy
= -line
->dy
;
305 /* x/y positions in fixed point */
306 x
[0] = subpixel_snap(v1
[0][0] - setup
->pixel_offset
);
307 x
[1] = subpixel_snap(v2
[0][0] - setup
->pixel_offset
);
308 x
[2] = subpixel_snap(v2
[0][0] - setup
->pixel_offset
);
309 x
[3] = subpixel_snap(v1
[0][0] - setup
->pixel_offset
);
311 y
[0] = subpixel_snap(v1
[0][1] - half_width
- setup
->pixel_offset
);
312 y
[1] = subpixel_snap(v2
[0][1] - half_width
- setup
->pixel_offset
);
313 y
[2] = subpixel_snap(v2
[0][1] + half_width
- setup
->pixel_offset
);
314 y
[3] = subpixel_snap(v1
[0][1] + half_width
- setup
->pixel_offset
);
319 /* if v2 is on top of v1, swap pointers */
320 const float (*temp
)[4] = v1
;
323 line
->dx
= -line
->dx
;
324 line
->dy
= -line
->dy
;
327 x
[0] = subpixel_snap(v1
[0][0] - half_width
- setup
->pixel_offset
);
328 x
[1] = subpixel_snap(v2
[0][0] - half_width
- setup
->pixel_offset
);
329 x
[2] = subpixel_snap(v2
[0][0] + half_width
- setup
->pixel_offset
);
330 x
[3] = subpixel_snap(v1
[0][0] + half_width
- setup
->pixel_offset
);
332 y
[0] = subpixel_snap(v1
[0][1] - setup
->pixel_offset
);
333 y
[1] = subpixel_snap(v2
[0][1] - setup
->pixel_offset
);
334 y
[2] = subpixel_snap(v2
[0][1] - setup
->pixel_offset
);
335 y
[3] = subpixel_snap(v1
[0][1] - setup
->pixel_offset
);
338 /* calculate the deltas */
339 line
->plane
[0].dcdy
= x
[0] - x
[1];
340 line
->plane
[1].dcdy
= x
[1] - x
[2];
341 line
->plane
[2].dcdy
= x
[2] - x
[3];
342 line
->plane
[3].dcdy
= x
[3] - x
[0];
344 line
->plane
[0].dcdx
= y
[0] - y
[1];
345 line
->plane
[1].dcdx
= y
[1] - y
[2];
346 line
->plane
[2].dcdx
= y
[2] - y
[3];
347 line
->plane
[3].dcdx
= y
[3] - y
[0];
353 /* Bounding rectangle (in pixels) */
355 /* Yes this is necessary to accurately calculate bounding boxes
356 * with the two fill-conventions we support. GL (normally) ends
357 * up needing a bottom-left fill convention, which requires
358 * slightly different rounding.
360 int adj
= (setup
->pixel_offset
!= 0) ? 1 : 0;
362 minx
= (MIN4(x
[0], x
[1], x
[2], x
[3]) + (FIXED_ONE
-1)) >> FIXED_ORDER
;
363 maxx
= (MAX4(x
[0], x
[1], x
[2], x
[3]) + (FIXED_ONE
-1)) >> FIXED_ORDER
;
364 miny
= (MIN4(y
[0], y
[1], y
[3], y
[3]) + (FIXED_ONE
-1) + adj
) >> FIXED_ORDER
;
365 maxy
= (MAX4(y
[0], y
[1], y
[3], y
[3]) + (FIXED_ONE
-1) + adj
) >> FIXED_ORDER
;
368 if (setup
->scissor_test
) {
369 minx
= MAX2(minx
, setup
->scissor
.current
.minx
);
370 maxx
= MIN2(maxx
, setup
->scissor
.current
.maxx
);
371 miny
= MAX2(miny
, setup
->scissor
.current
.miny
);
372 maxy
= MIN2(maxy
, setup
->scissor
.current
.maxy
);
375 minx
= MAX2(minx
, 0);
376 miny
= MAX2(miny
, 0);
377 maxx
= MIN2(maxx
, scene
->fb
.width
);
378 maxy
= MIN2(maxy
, scene
->fb
.height
);
382 if (miny
>= maxy
|| minx
>= maxx
) {
383 lp_scene_putback_data( scene
, tri_bytes
);
387 oneoverarea
= 1.0f
/ (line
->dx
* line
->dx
+ line
->dy
* line
->dy
);
389 /* Setup parameter interpolants:
391 setup_line_coefficients( setup
, line
, oneoverarea
, v1
, v2
);
393 for (i
= 0; i
< 4; i
++) {
394 struct lp_rast_plane
*plane
= &line
->plane
[i
];
396 /* half-edge constants, will be interated over the whole render
399 plane
->c
= plane
->dcdx
* x
[i
] - plane
->dcdy
* y
[i
];
402 /* correct for top-left vs. bottom-left fill convention.
404 * note that we're overloading gl_rasterization_rules to mean
405 * both (0.5,0.5) pixel centers *and* bottom-left filling
408 * GL actually has a top-left filling convention, but GL's
409 * notion of "top" differs from gallium's...
411 * Also, sometimes (in FBO cases) GL will render upside down
412 * to its usual method, in which case it will probably want
413 * to use the opposite, top-left convention.
415 if (plane
->dcdx
< 0) {
416 /* both fill conventions want this - adjust for left edges */
419 else if (plane
->dcdx
== 0) {
420 if (setup
->pixel_offset
== 0) {
421 /* correct for top-left fill convention:
423 if (plane
->dcdy
> 0) plane
->c
++;
426 /* correct for bottom-left fill convention:
428 if (plane
->dcdy
< 0) plane
->c
++;
432 plane
->dcdx
*= FIXED_ONE
;
433 plane
->dcdy
*= FIXED_ONE
;
435 /* find trivial reject offsets for each edge for a single-pixel
436 * sized block. These will be scaled up at each recursive level to
437 * match the active blocksize. Scaling in this way works best if
438 * the blocks are square.
441 if (plane
->dcdx
< 0) plane
->eo
-= plane
->dcdx
;
442 if (plane
->dcdy
> 0) plane
->eo
+= plane
->dcdy
;
444 /* Calculate trivial accept offsets from the above.
446 plane
->ei
= plane
->dcdy
- plane
->dcdx
- plane
->eo
;
448 plane
->step
= line
->step
[i
];
450 /* Fill in the inputs.step[][] arrays.
451 * We've manually unrolled some loops here.
453 #define SETUP_STEP(j, x, y) \
454 line->step[i][j] = y * plane->dcdy - x * plane->dcdx
468 SETUP_STEP(10, 0, 3);
469 SETUP_STEP(11, 1, 3);
471 SETUP_STEP(12, 2, 2);
472 SETUP_STEP(13, 3, 2);
473 SETUP_STEP(14, 2, 3);
474 SETUP_STEP(15, 3, 3);
480 * When rasterizing scissored tris, use the intersection of the
481 * triangle bounding box and the scissor rect to generate the
484 * This permits us to cut off the triangle "tails" that are present
485 * in the intermediate recursive levels caused when two of the
486 * triangles edges don't diverge quickly enough to trivially reject
487 * exterior blocks from the triangle.
489 * It's not really clear if it's worth worrying about these tails,
490 * but since we generate the planes for each scissored tri, it's
491 * free to trim them in this case.
493 * Note that otherwise, the scissor planes only vary in 'C' value,
494 * and even then only on state-changes. Could alternatively store
495 * these planes elsewhere.
497 if (nr_planes
== 8) {
498 line
->plane
[4].step
= step_scissor_maxx
;
499 line
->plane
[4].dcdx
= 1;
500 line
->plane
[4].dcdy
= 0;
501 line
->plane
[4].c
= maxx
;
502 line
->plane
[4].ei
= -1;
503 line
->plane
[4].eo
= 0;
505 line
->plane
[5].step
= step_scissor_miny
;
506 line
->plane
[5].dcdx
= 0;
507 line
->plane
[5].dcdy
= 1;
508 line
->plane
[5].c
= 1-miny
;
509 line
->plane
[5].ei
= 0;
510 line
->plane
[5].eo
= 1;
512 line
->plane
[6].step
= step_scissor_maxy
;
513 line
->plane
[6].dcdx
= 0;
514 line
->plane
[6].dcdy
= -1;
515 line
->plane
[6].c
= maxy
;
516 line
->plane
[6].ei
= -1;
517 line
->plane
[6].eo
= 0;
519 line
->plane
[7].step
= step_scissor_minx
;
520 line
->plane
[7].dcdx
= -1;
521 line
->plane
[7].dcdy
= 0;
522 line
->plane
[7].c
= 1-minx
;
523 line
->plane
[7].ei
= 0;
524 line
->plane
[7].eo
= 1;
529 * All fields of 'tri' are now set. The remaining code here is
530 * concerned with binning.
533 /* Convert to tile coordinates, and inclusive ranges:
535 ix0
= minx
/ TILE_SIZE
;
536 iy0
= miny
/ TILE_SIZE
;
537 ix1
= (maxx
-1) / TILE_SIZE
;
538 iy1
= (maxy
-1) / TILE_SIZE
;
541 * Clamp to framebuffer size
543 assert(ix0
== MAX2(ix0
, 0));
544 assert(iy0
== MAX2(iy0
, 0));
545 assert(ix1
== MIN2(ix1
, scene
->tiles_x
- 1));
546 assert(iy1
== MIN2(iy1
, scene
->tiles_y
- 1));
548 /* Determine which tile(s) intersect the triangle's bounding box
550 if (iy0
== iy1
&& ix0
== ix1
)
552 /* Triangle is contained in a single tile:
554 lp_scene_bin_command( scene
, ix0
, iy0
,
555 lp_rast_tri_tab
[nr_planes
],
556 lp_rast_arg_triangle(line
, (1<<nr_planes
)-1) );
566 int is_blit
= -1; /* undetermined */
568 for (i
= 0; i
< nr_planes
; i
++) {
569 c
[i
] = (line
->plane
[i
].c
+
570 line
->plane
[i
].dcdy
* iy0
* TILE_SIZE
-
571 line
->plane
[i
].dcdx
* ix0
* TILE_SIZE
);
573 ei
[i
] = line
->plane
[i
].ei
<< TILE_ORDER
;
574 eo
[i
] = line
->plane
[i
].eo
<< TILE_ORDER
;
575 xstep
[i
] = -(line
->plane
[i
].dcdx
<< TILE_ORDER
);
576 ystep
[i
] = line
->plane
[i
].dcdy
<< TILE_ORDER
;
581 /* Test tile-sized blocks against the triangle.
582 * Discard blocks fully outside the tri. If the block is fully
583 * contained inside the tri, bin an lp_rast_shade_tile command.
584 * Else, bin a lp_rast_triangle command.
586 for (y
= iy0
; y
<= iy1
; y
++)
588 boolean in
= FALSE
; /* are we inside the triangle? */
591 for (i
= 0; i
< nr_planes
; i
++)
594 for (x
= ix0
; x
<= ix1
; x
++)
599 for (i
= 0; i
< nr_planes
; i
++) {
600 int planeout
= cx
[i
] + eo
[i
];
601 int planepartial
= cx
[i
] + ei
[i
] - 1;
602 out
|= (planeout
>> 31);
603 partial
|= (planepartial
>> 31) & (1<<i
);
608 break; /* exiting triangle, all done with this row */
609 LP_COUNT(nr_empty_64
);
612 /* Not trivially accepted by at least one plane -
613 * rasterize/shade partial tile
615 int count
= util_bitcount(partial
);
617 lp_scene_bin_command( scene
, x
, y
,
618 lp_rast_tri_tab
[count
],
619 lp_rast_arg_triangle(line
, partial
) );
621 LP_COUNT(nr_partially_covered_64
);
624 /* triangle covers the whole tile- shade whole tile */
625 LP_COUNT(nr_fully_covered_64
);
627 /* leverages on existing code in lp_setup_tri.c */
628 do_triangle_ccw_whole_tile(setup
, scene
, line
, x
, y
,
632 /* Iterate cx values across the region:
634 for (i
= 0; i
< nr_planes
; i
++)
638 /* Iterate c values down the region:
640 for (i
= 0; i
< nr_planes
; i
++)
647 void lp_setup_choose_line( struct lp_setup_context
*setup
)
649 setup
->line
= lp_setup_line
;