llvmpipe: improve the in/out test a little
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Rasterization for binned triangles within a tile
30 */
31
32 #include <limits.h>
33 #include "util/u_math.h"
34 #include "lp_debug.h"
35 #include "lp_rast_priv.h"
36 #include "lp_tile_soa.h"
37
38
39 /**
40 * Map an index in [0,15] to an x,y position, multiplied by 4.
41 * This is used to get the position of each subtile in a 4x4
42 * grid of edge step values.
43 */
44 static const int pos_table4[16][2] = {
45 { 0, 0 },
46 { 4, 0 },
47 { 0, 4 },
48 { 4, 4 },
49 { 8, 0 },
50 { 12, 0 },
51 { 8, 4 },
52 { 12, 4 },
53 { 0, 8 },
54 { 4, 8 },
55 { 0, 12 },
56 { 4, 12 },
57 { 8, 8 },
58 { 12, 8 },
59 { 8, 12 },
60 { 12, 12 }
61 };
62
63
64 static const int pos_table16[16][2] = {
65 { 0, 0 },
66 { 16, 0 },
67 { 0, 16 },
68 { 16, 16 },
69 { 32, 0 },
70 { 48, 0 },
71 { 32, 16 },
72 { 48, 16 },
73 { 0, 32 },
74 { 16, 32 },
75 { 0, 48 },
76 { 16, 48 },
77 { 32, 32 },
78 { 48, 32 },
79 { 32, 48 },
80 { 48, 48 }
81 };
82
83
84 /**
85 * Shade all pixels in a 4x4 block.
86 */
87 static void
88 block_full_4( struct lp_rasterizer_task *rast_task,
89 const struct lp_rast_triangle *tri,
90 int x, int y )
91 {
92 /* Set c1,c2,c3 to large values so the in/out test always passes */
93 const int32_t c1 = INT_MIN/2, c2 = INT_MIN/2, c3 = INT_MIN/2;
94 lp_rast_shade_quads(rast_task->rast,
95 rast_task->thread_index,
96 &tri->inputs,
97 x, y,
98 c1, c2, c3);
99 }
100
101
102 /**
103 * Shade all pixels in a 16x16 block.
104 */
105 static void
106 block_full_16( struct lp_rasterizer_task *rast_task,
107 const struct lp_rast_triangle *tri,
108 int x, int y )
109 {
110 unsigned ix, iy;
111 assert(x % 16 == 0);
112 assert(y % 16 == 0);
113 for (iy = 0; iy < 16; iy += 4)
114 for (ix = 0; ix < 16; ix += 4)
115 block_full_4(rast_task, tri, x + ix, y + iy);
116 }
117
118
119 /**
120 * Pass the 4x4 pixel block to the shader function.
121 * Determination of which of the 16 pixels lies inside the triangle
122 * will be done as part of the fragment shader.
123 */
124 static void
125 do_block_4( struct lp_rasterizer_task *rast_task,
126 const struct lp_rast_triangle *tri,
127 int x, int y,
128 int c1,
129 int c2,
130 int c3 )
131 {
132 lp_rast_shade_quads(rast_task->rast,
133 rast_task->thread_index,
134 &tri->inputs,
135 x, y,
136 -c1, -c2, -c3);
137 }
138
139
140 /**
141 * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
142 * of the triangle's bounds.
143 */
144 static void
145 do_block_16( struct lp_rasterizer_task *rast_task,
146 const struct lp_rast_triangle *tri,
147 int x, int y,
148 int c1,
149 int c2,
150 int c3 )
151 {
152 const int ei1 = tri->ei1 * 4;
153 const int ei2 = tri->ei2 * 4;
154 const int ei3 = tri->ei3 * 4;
155
156 const int eo1 = tri->eo1 * 4;
157 const int eo2 = tri->eo2 * 4;
158 const int eo3 = tri->eo3 * 4;
159
160 int i;
161
162 assert(x % 16 == 0);
163 assert(y % 16 == 0);
164
165 for (i = 0; i < 16; i++) {
166 int cx1 = c1 + (tri->inputs.step[0][i] * 4);
167 int cx2 = c2 + (tri->inputs.step[1][i] * 4);
168 int cx3 = c3 + (tri->inputs.step[2][i] * 4);
169
170 if (cx1 + eo1 < 0 ||
171 cx2 + eo2 < 0 ||
172 cx3 + eo3 < 0) {
173 /* the block is completely outside the triangle - nop */
174 }
175 else {
176 int px = x + pos_table4[i][0];
177 int py = y + pos_table4[i][1];
178 if (cx1 + ei1 > 0 &&
179 cx2 + ei2 > 0 &&
180 cx3 + ei3 > 0) {
181 /* the block is completely inside the triangle */
182 block_full_4(rast_task, tri, px, py);
183 }
184 else {
185 /* the block is partially in/out of the triangle */
186 do_block_4(rast_task, tri, px, py, cx1, cx2, cx3);
187 }
188 }
189 }
190 }
191
192
193 /**
194 * Scan the tile in chunks and figure out which pixels to rasterize
195 * for this triangle.
196 */
197 void
198 lp_rast_triangle( struct lp_rasterizer *rast,
199 unsigned thread_index,
200 const union lp_rast_cmd_arg arg )
201 {
202 struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index];
203 const struct lp_rast_triangle *tri = arg.triangle;
204
205 int x = rast_task->x;
206 int y = rast_task->y;
207 unsigned i;
208
209 int c1 = tri->c1 + tri->dx12 * y - tri->dy12 * x;
210 int c2 = tri->c2 + tri->dx23 * y - tri->dy23 * x;
211 int c3 = tri->c3 + tri->dx31 * y - tri->dy31 * x;
212
213 int ei1 = tri->ei1 * 16;
214 int ei2 = tri->ei2 * 16;
215 int ei3 = tri->ei3 * 16;
216
217 int eo1 = tri->eo1 * 16;
218 int eo2 = tri->eo2 * 16;
219 int eo3 = tri->eo3 * 16;
220
221 LP_DBG(DEBUG_RAST, "lp_rast_triangle\n");
222
223 /* Walk over the tile to build a list of 4x4 pixel blocks which will
224 * be filled/shaded. We do this at two granularities: 16x16 blocks
225 * and then 4x4 blocks.
226 */
227 for (i = 0; i < 16; i++) {
228 int cx1 = c1 + (tri->inputs.step[0][i] * 16);
229 int cx2 = c2 + (tri->inputs.step[1][i] * 16);
230 int cx3 = c3 + (tri->inputs.step[2][i] * 16);
231
232 if (cx1 + eo1 < 0 ||
233 cx2 + eo2 < 0 ||
234 cx3 + eo3 < 0) {
235 /* the block is completely outside the triangle - nop */
236 }
237 else {
238 int px = x + pos_table16[i][0];
239 int py = y + pos_table16[i][1];
240
241 if (cx1 + ei1 > 0 &&
242 cx2 + ei2 > 0 &&
243 cx3 + ei3 > 0) {
244 /* the block is completely inside the triangle */
245 block_full_16(rast_task, tri, px, py);
246 }
247 else {
248 /* the block is partially in/out of the triangle */
249 do_block_16(rast_task, tri, px, py, cx1, cx2, cx3);
250 }
251 }
252 }
253 }