1 /**************************************************************************
3 * Copyright 2007-2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Rasterization for binned triangles within a tile
33 #include "util/u_math.h"
36 #include "lp_rast_priv.h"
37 #include "lp_tile_soa.h"
41 * Map an index in [0,15] to an x,y position, multiplied by 4.
42 * This is used to get the position of each subtile in a 4x4
43 * grid of edge step values.
44 * Note: we can use some bit twiddling to compute these values instead
45 * of using a look-up table, but there's no measurable performance
48 static const int pos_table4
[16][2] = {
68 static const int pos_table16
[16][2] = {
89 * Shade all pixels in a 4x4 block.
92 block_full_4(struct lp_rasterizer_task
*task
,
93 const struct lp_rast_triangle
*tri
,
96 lp_rast_shade_quads_all(task
, &tri
->inputs
, x
, y
);
101 * Shade all pixels in a 16x16 block.
104 block_full_16(struct lp_rasterizer_task
*task
,
105 const struct lp_rast_triangle
*tri
,
111 for (iy
= 0; iy
< 16; iy
+= 4)
112 for (ix
= 0; ix
< 16; ix
+= 4)
113 block_full_4(task
, tri
, x
+ ix
, y
+ iy
);
118 * Pass the 4x4 pixel block to the shader function.
119 * Determination of which of the 16 pixels lies inside the triangle
120 * will be done as part of the fragment shader.
123 do_block_4(struct lp_rasterizer_task
*task
,
124 const struct lp_rast_triangle
*tri
,
126 int c1
, int c2
, int c3
)
131 lp_rast_shade_quads(task
, &tri
->inputs
, x
, y
, -c1
, -c2
, -c3
);
136 * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
137 * of the triangle's bounds.
140 do_block_16(struct lp_rasterizer_task
*task
,
141 const struct lp_rast_triangle
*tri
,
143 int c0
, int c1
, int c2
)
155 eo
[0] = tri
->eo1
* 4;
156 eo
[1] = tri
->eo2
* 4;
157 eo
[2] = tri
->eo3
* 4;
163 for (j
= 0; j
< 3; j
++) {
164 const int *step
= tri
->inputs
.step
[j
];
165 const int cx
= c
[j
] + eo
[j
];
167 /* Mask has bits set whenever we are outside any of the edges.
169 for (i
= 0; i
< 16; i
++) {
170 int out
= cx
+ step
[i
] * 4;
171 mask
|= (out
>> 31) & (1 << i
);
175 mask
= ~mask
& 0xffff;
177 int i
= ffs(mask
) - 1;
178 int px
= x
+ pos_table4
[i
][0];
179 int py
= y
+ pos_table4
[i
][1];
180 int cx1
= c0
+ tri
->inputs
.step
[0][i
] * 4;
181 int cx2
= c1
+ tri
->inputs
.step
[1][i
] * 4;
182 int cx3
= c2
+ tri
->inputs
.step
[2][i
] * 4;
186 /* Don't bother testing if the 4x4 block is entirely in/out of
187 * the triangle. It's a little faster to do it in the jit code.
189 LP_COUNT(nr_non_empty_4
);
190 do_block_4(task
, tri
, px
, py
, cx1
, cx2
, cx3
);
196 * Scan the tile in chunks and figure out which pixels to rasterize
200 lp_rast_triangle(struct lp_rasterizer_task
*task
,
201 const union lp_rast_cmd_arg arg
)
203 const struct lp_rast_triangle
*tri
= arg
.triangle
;
204 const int x
= task
->x
, y
= task
->y
;
205 int ei
[3], eo
[3], c
[3];
206 unsigned outmask
, inmask
, partial_mask
;
209 c
[0] = tri
->c1
+ tri
->dx12
* y
- tri
->dy12
* x
;
210 c
[1] = tri
->c2
+ tri
->dx23
* y
- tri
->dy23
* x
;
211 c
[2] = tri
->c3
+ tri
->dx31
* y
- tri
->dy31
* x
;
213 eo
[0] = tri
->eo1
* 16;
214 eo
[1] = tri
->eo2
* 16;
215 eo
[2] = tri
->eo3
* 16;
217 ei
[0] = tri
->ei1
* 16;
218 ei
[1] = tri
->ei2
* 16;
219 ei
[2] = tri
->ei3
* 16;
224 for (j
= 0; j
< 3; j
++) {
225 const int *step
= tri
->inputs
.step
[j
];
226 const int cox
= c
[j
] + eo
[j
];
227 const int cio
= ei
[j
]- eo
[j
];
229 /* Outmask has bits set whenever we are outside any of the
232 /* Inmask has bits set whenever we are inside all of the edges.
234 for (i
= 0; i
< 16; i
++) {
235 int out
= cox
+ step
[i
] * 16;
237 outmask
|= (out
>> 31) & (1 << i
);
238 inmask
&= ~((in
>> 31) & (1 << i
));
242 assert((outmask
& inmask
) == 0);
244 if (outmask
== 0xffff)
247 /* Invert mask, so that bits are set whenever we are at least
248 * partially inside all of the edges:
250 partial_mask
= ~inmask
& ~outmask
& 0xffff;
252 /* Iterate over partials:
254 while (partial_mask
) {
255 int i
= ffs(partial_mask
) - 1;
256 int px
= x
+ pos_table16
[i
][0];
257 int py
= y
+ pos_table16
[i
][1];
258 int cx1
= c
[0] + tri
->inputs
.step
[0][i
] * 16;
259 int cx2
= c
[1] + tri
->inputs
.step
[1][i
] * 16;
260 int cx3
= c
[2] + tri
->inputs
.step
[2][i
] * 16;
262 partial_mask
&= ~(1 << i
);
264 LP_COUNT(nr_partially_covered_16
);
265 do_block_16(task
, tri
, px
, py
, cx1
, cx2
, cx3
);
268 /* Iterate over fulls:
271 int i
= ffs(inmask
) - 1;
272 int px
= x
+ pos_table16
[i
][0];
273 int py
= y
+ pos_table16
[i
][1];
277 LP_COUNT(nr_fully_covered_16
);
278 block_full_16(task
, tri
, px
, py
);