llvmpipe: native rasterization for lines
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_line.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for lines
30 */
31
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_rast.h"
37 #include "lp_state_fs.h"
38
39 #define NUM_CHANNELS 4
40
41
42 static const int step_scissor_minx[16] = {
43 0, 1, 0, 1,
44 2, 3, 2, 3,
45 0, 1, 0, 1,
46 2, 3, 2, 3
47 };
48
49 static const int step_scissor_maxx[16] = {
50 0, -1, 0, -1,
51 -2, -3, -2, -3,
52 0, -1, 0, -1,
53 -2, -3, -2, -3
54 };
55
56 static const int step_scissor_miny[16] = {
57 0, 0, 1, 1,
58 0, 0, 1, 1,
59 2, 2, 3, 3,
60 2, 2, 3, 3
61 };
62
63 static const int step_scissor_maxy[16] = {
64 0, 0, -1, -1,
65 0, 0, -1, -1,
66 -2, -2, -3, -3,
67 -2, -2, -3, -3
68 };
69
70
71
72 /**
73 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
74 */
75 static void constant_coef( struct lp_setup_context *setup,
76 struct lp_rast_triangle *tri,
77 unsigned slot,
78 const float value,
79 unsigned i )
80 {
81 tri->inputs.a0[slot][i] = value;
82 tri->inputs.dadx[slot][i] = 0.0f;
83 tri->inputs.dady[slot][i] = 0.0f;
84 }
85
86
87 /**
88 * Compute a0, dadx and dady for a linearly interpolated coefficient,
89 * for a triangle.
90 */
91 static void linear_coef( struct lp_setup_context *setup,
92 struct lp_rast_triangle *tri,
93 float oneoverarea,
94 unsigned slot,
95 const float (*v1)[4],
96 const float (*v2)[4],
97 unsigned vert_attr,
98 unsigned i)
99 {
100 float a1 = v1[vert_attr][i];
101 float a2 = v2[vert_attr][i];
102
103 float da21 = a1 - a2;
104 float dadx = da21 * tri->dx * oneoverarea;
105 float dady = da21 * tri->dy * oneoverarea;
106
107 tri->inputs.dadx[slot][i] = dadx;
108 tri->inputs.dady[slot][i] = dady;
109
110 tri->inputs.a0[slot][i] = (a1 -
111 (dadx * (v1[0][0] - setup->pixel_offset) +
112 dady * (v1[0][1] - setup->pixel_offset)));
113 }
114
115
116 /**
117 * Compute a0, dadx and dady for a perspective-corrected interpolant,
118 * for a triangle.
119 * We basically multiply the vertex value by 1/w before computing
120 * the plane coefficients (a0, dadx, dady).
121 * Later, when we compute the value at a particular fragment position we'll
122 * divide the interpolated value by the interpolated W at that fragment.
123 */
124 static void perspective_coef( struct lp_setup_context *setup,
125 struct lp_rast_triangle *tri,
126 float oneoverarea,
127 unsigned slot,
128 const float (*v1)[4],
129 const float (*v2)[4],
130 unsigned vert_attr,
131 unsigned i)
132 {
133 /* premultiply by 1/w (v[0][3] is always 1/w):
134 */
135 float a1 = v1[vert_attr][i] * v1[0][3];
136 float a2 = v2[vert_attr][i] * v2[0][3];
137
138 float da21 = a1 - a2;
139 float dadx = da21 * tri->dx * oneoverarea;
140 float dady = da21 * tri->dy * oneoverarea;
141
142 tri->inputs.dadx[slot][i] = dadx;
143 tri->inputs.dady[slot][i] = dady;
144
145 tri->inputs.a0[slot][i] = (a1 -
146 (dadx * (v1[0][0] - setup->pixel_offset) +
147 dady * (v1[0][1] - setup->pixel_offset)));
148 }
149
150 /**
151 * Compute the tri->coef[] array dadx, dady, a0 values.
152 */
153 static void setup_line_coefficients( struct lp_setup_context *setup,
154 struct lp_rast_triangle *tri,
155 float oneoverarea,
156 const float (*v1)[4],
157 const float (*v2)[4])
158 {
159 unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
160 unsigned slot;
161
162 /* setup interpolation for all the remaining attributes:
163 */
164 for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
165 unsigned vert_attr = setup->fs.input[slot].src_index;
166 unsigned usage_mask = setup->fs.input[slot].usage_mask;
167 unsigned i;
168
169 switch (setup->fs.input[slot].interp) {
170 case LP_INTERP_CONSTANT:
171 if (setup->flatshade_first) {
172 for (i = 0; i < NUM_CHANNELS; i++)
173 if (usage_mask & (1 << i))
174 constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
175 }
176 else {
177 for (i = 0; i < NUM_CHANNELS; i++)
178 if (usage_mask & (1 << i))
179 constant_coef(setup, tri, slot+1, v2[vert_attr][i], i);
180 }
181 break;
182
183 case LP_INTERP_LINEAR:
184 for (i = 0; i < NUM_CHANNELS; i++)
185 if (usage_mask & (1 << i))
186 linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i);
187 break;
188
189 case LP_INTERP_PERSPECTIVE:
190 for (i = 0; i < NUM_CHANNELS; i++)
191 if (usage_mask & (1 << i))
192 perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i);
193 fragcoord_usage_mask |= TGSI_WRITEMASK_W;
194 break;
195
196 case LP_INTERP_POSITION:
197 /*
198 * The generated pixel interpolators will pick up the coeffs from
199 * slot 0, so all need to ensure that the usage mask is covers all
200 * usages.
201 */
202 fragcoord_usage_mask |= usage_mask;
203 break;
204
205 default:
206 assert(0);
207 }
208 }
209
210 /* The internal position input is in slot zero:
211 */
212 lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2,
213 fragcoord_usage_mask);
214 }
215
216
217
218 static INLINE int subpixel_snap( float a )
219 {
220 return util_iround(FIXED_ONE * a);
221 }
222
223
224 /**
225 * Print line vertex attribs (for debug).
226 */
227 static void
228 print_line(struct lp_setup_context *setup,
229 const float (*v1)[4],
230 const float (*v2)[4])
231 {
232 uint i;
233
234 debug_printf("llvmpipe line\n");
235 for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
236 debug_printf(" v1[%d]: %f %f %f %f\n", i,
237 v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
238 }
239 for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
240 debug_printf(" v2[%d]: %f %f %f %f\n", i,
241 v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
242 }
243 }
244
245
246 static void
247 lp_setup_line( struct lp_setup_context *setup,
248 const float (*v1)[4],
249 const float (*v2)[4])
250 {
251 struct lp_scene *scene = lp_setup_get_current_scene(setup);
252 struct lp_rast_triangle *line;
253 float oneoverarea;
254 float half_width = setup->line_width / 2;
255 int minx, maxx, miny, maxy;
256 int ix0, ix1, iy0, iy1;
257 unsigned tri_bytes;
258 int x[4];
259 int y[4];
260 int i;
261 int nr_planes = 4;
262 boolean opaque;
263
264 if (0)
265 print_line(setup, v1, v2);
266
267 if (setup->scissor_test) {
268 nr_planes = 8;
269 }
270 else {
271 nr_planes = 4;
272 }
273
274 line = lp_setup_alloc_triangle(scene,
275 setup->fs.nr_inputs,
276 nr_planes,
277 &tri_bytes);
278 if (!line)
279 return;
280
281 #ifndef DEBUG
282 line->v[0][0] = v1[0][0];
283 line->v[1][0] = v2[0][0];
284 line->v[0][1] = v1[0][1];
285 line->v[1][1] = v2[0][1];
286 #endif
287
288 /* pre-calculation(based on given vertices) to determine if line is
289 * more horizontal or more vertical
290 */
291 line->dx = v1[0][0] - v2[0][0];
292 line->dy = v1[0][1] - v2[0][1];
293
294 /* x-major line */
295 if (fabsf(line->dx) >= fabsf(line->dy)) {
296 if (line->dx < 0) {
297 /* if v2 is to the right of v1, swap pointers */
298 const float (*temp)[4] = v1;
299 v1 = v2;
300 v2 = temp;
301 line->dx = -line->dx;
302 line->dy = -line->dy;
303 }
304
305 /* x/y positions in fixed point */
306 x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
307 x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
308 x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
309 x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset);
310
311 y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset);
312 y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset);
313 y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset);
314 y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset);
315 }
316 else{
317 /* y-major line */
318 if (line->dy > 0) {
319 /* if v2 is on top of v1, swap pointers */
320 const float (*temp)[4] = v1;
321 v1 = v2;
322 v2 = temp;
323 line->dx = -line->dx;
324 line->dy = -line->dy;
325 }
326
327 x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset);
328 x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset);
329 x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset);
330 x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset);
331
332 y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
333 y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
334 y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
335 y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset);
336 }
337
338 /* calculate the deltas */
339 line->plane[0].dcdy = x[0] - x[1];
340 line->plane[1].dcdy = x[1] - x[2];
341 line->plane[2].dcdy = x[2] - x[3];
342 line->plane[3].dcdy = x[3] - x[0];
343
344 line->plane[0].dcdx = y[0] - y[1];
345 line->plane[1].dcdx = y[1] - y[2];
346 line->plane[2].dcdx = y[2] - y[3];
347 line->plane[3].dcdx = y[3] - y[0];
348
349
350 LP_COUNT(nr_tris);
351
352
353 /* Bounding rectangle (in pixels) */
354 {
355 /* Yes this is necessary to accurately calculate bounding boxes
356 * with the two fill-conventions we support. GL (normally) ends
357 * up needing a bottom-left fill convention, which requires
358 * slightly different rounding.
359 */
360 int adj = (setup->pixel_offset != 0) ? 1 : 0;
361
362 minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
363 maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
364 miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
365 maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
366 }
367
368 if (setup->scissor_test) {
369 minx = MAX2(minx, setup->scissor.current.minx);
370 maxx = MIN2(maxx, setup->scissor.current.maxx);
371 miny = MAX2(miny, setup->scissor.current.miny);
372 maxy = MIN2(maxy, setup->scissor.current.maxy);
373 }
374 else {
375 minx = MAX2(minx, 0);
376 miny = MAX2(miny, 0);
377 maxx = MIN2(maxx, scene->fb.width);
378 maxy = MIN2(maxy, scene->fb.height);
379 }
380
381
382 if (miny >= maxy || minx >= maxx) {
383 lp_scene_putback_data( scene, tri_bytes );
384 return;
385 }
386
387 oneoverarea = 1.0f / (line->dx * line->dx + line->dy * line->dy);
388
389 /* Setup parameter interpolants:
390 */
391 setup_line_coefficients( setup, line, oneoverarea, v1, v2);
392
393 for (i = 0; i < 4; i++) {
394 struct lp_rast_plane *plane = &line->plane[i];
395
396 /* half-edge constants, will be interated over the whole render
397 * target.
398 */
399 plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
400
401
402 /* correct for top-left vs. bottom-left fill convention.
403 *
404 * note that we're overloading gl_rasterization_rules to mean
405 * both (0.5,0.5) pixel centers *and* bottom-left filling
406 * convention.
407 *
408 * GL actually has a top-left filling convention, but GL's
409 * notion of "top" differs from gallium's...
410 *
411 * Also, sometimes (in FBO cases) GL will render upside down
412 * to its usual method, in which case it will probably want
413 * to use the opposite, top-left convention.
414 */
415 if (plane->dcdx < 0) {
416 /* both fill conventions want this - adjust for left edges */
417 plane->c++;
418 }
419 else if (plane->dcdx == 0) {
420 if (setup->pixel_offset == 0) {
421 /* correct for top-left fill convention:
422 */
423 if (plane->dcdy > 0) plane->c++;
424 }
425 else {
426 /* correct for bottom-left fill convention:
427 */
428 if (plane->dcdy < 0) plane->c++;
429 }
430 }
431
432 plane->dcdx *= FIXED_ONE;
433 plane->dcdy *= FIXED_ONE;
434
435 /* find trivial reject offsets for each edge for a single-pixel
436 * sized block. These will be scaled up at each recursive level to
437 * match the active blocksize. Scaling in this way works best if
438 * the blocks are square.
439 */
440 plane->eo = 0;
441 if (plane->dcdx < 0) plane->eo -= plane->dcdx;
442 if (plane->dcdy > 0) plane->eo += plane->dcdy;
443
444 /* Calculate trivial accept offsets from the above.
445 */
446 plane->ei = plane->dcdy - plane->dcdx - plane->eo;
447
448 plane->step = line->step[i];
449
450 /* Fill in the inputs.step[][] arrays.
451 * We've manually unrolled some loops here.
452 */
453 #define SETUP_STEP(j, x, y) \
454 line->step[i][j] = y * plane->dcdy - x * plane->dcdx
455
456 SETUP_STEP(0, 0, 0);
457 SETUP_STEP(1, 1, 0);
458 SETUP_STEP(2, 0, 1);
459 SETUP_STEP(3, 1, 1);
460
461 SETUP_STEP(4, 2, 0);
462 SETUP_STEP(5, 3, 0);
463 SETUP_STEP(6, 2, 1);
464 SETUP_STEP(7, 3, 1);
465
466 SETUP_STEP(8, 0, 2);
467 SETUP_STEP(9, 1, 2);
468 SETUP_STEP(10, 0, 3);
469 SETUP_STEP(11, 1, 3);
470
471 SETUP_STEP(12, 2, 2);
472 SETUP_STEP(13, 3, 2);
473 SETUP_STEP(14, 2, 3);
474 SETUP_STEP(15, 3, 3);
475 #undef STEP
476 }
477
478
479 /*
480 * When rasterizing scissored tris, use the intersection of the
481 * triangle bounding box and the scissor rect to generate the
482 * scissor planes.
483 *
484 * This permits us to cut off the triangle "tails" that are present
485 * in the intermediate recursive levels caused when two of the
486 * triangles edges don't diverge quickly enough to trivially reject
487 * exterior blocks from the triangle.
488 *
489 * It's not really clear if it's worth worrying about these tails,
490 * but since we generate the planes for each scissored tri, it's
491 * free to trim them in this case.
492 *
493 * Note that otherwise, the scissor planes only vary in 'C' value,
494 * and even then only on state-changes. Could alternatively store
495 * these planes elsewhere.
496 */
497 if (nr_planes == 8) {
498 line->plane[4].step = step_scissor_maxx;
499 line->plane[4].dcdx = 1;
500 line->plane[4].dcdy = 0;
501 line->plane[4].c = maxx;
502 line->plane[4].ei = -1;
503 line->plane[4].eo = 0;
504
505 line->plane[5].step = step_scissor_miny;
506 line->plane[5].dcdx = 0;
507 line->plane[5].dcdy = 1;
508 line->plane[5].c = 1-miny;
509 line->plane[5].ei = 0;
510 line->plane[5].eo = 1;
511
512 line->plane[6].step = step_scissor_maxy;
513 line->plane[6].dcdx = 0;
514 line->plane[6].dcdy = -1;
515 line->plane[6].c = maxy;
516 line->plane[6].ei = -1;
517 line->plane[6].eo = 0;
518
519 line->plane[7].step = step_scissor_minx;
520 line->plane[7].dcdx = -1;
521 line->plane[7].dcdy = 0;
522 line->plane[7].c = 1-minx;
523 line->plane[7].ei = 0;
524 line->plane[7].eo = 1;
525 }
526
527
528 /*
529 * All fields of 'tri' are now set. The remaining code here is
530 * concerned with binning.
531 */
532
533 /* Convert to tile coordinates, and inclusive ranges:
534 */
535 ix0 = minx / TILE_SIZE;
536 iy0 = miny / TILE_SIZE;
537 ix1 = (maxx-1) / TILE_SIZE;
538 iy1 = (maxy-1) / TILE_SIZE;
539
540 /*
541 * Clamp to framebuffer size
542 */
543 assert(ix0 == MAX2(ix0, 0));
544 assert(iy0 == MAX2(iy0, 0));
545 assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
546 assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
547
548 /* Determine which tile(s) intersect the triangle's bounding box
549 */
550 if (iy0 == iy1 && ix0 == ix1)
551 {
552 /* Triangle is contained in a single tile:
553 */
554 lp_scene_bin_command( scene, ix0, iy0,
555 lp_rast_tri_tab[nr_planes],
556 lp_rast_arg_triangle(line, (1<<nr_planes)-1) );
557 }
558 else
559 {
560 int c[8];
561 int ei[8];
562 int eo[8];
563 int xstep[8];
564 int ystep[8];
565 int x, y;
566 int is_blit = -1; /* undetermined */
567
568 for (i = 0; i < nr_planes; i++) {
569 c[i] = (line->plane[i].c +
570 line->plane[i].dcdy * iy0 * TILE_SIZE -
571 line->plane[i].dcdx * ix0 * TILE_SIZE);
572
573 ei[i] = line->plane[i].ei << TILE_ORDER;
574 eo[i] = line->plane[i].eo << TILE_ORDER;
575 xstep[i] = -(line->plane[i].dcdx << TILE_ORDER);
576 ystep[i] = line->plane[i].dcdy << TILE_ORDER;
577 }
578
579
580
581 /* Test tile-sized blocks against the triangle.
582 * Discard blocks fully outside the tri. If the block is fully
583 * contained inside the tri, bin an lp_rast_shade_tile command.
584 * Else, bin a lp_rast_triangle command.
585 */
586 for (y = iy0; y <= iy1; y++)
587 {
588 boolean in = FALSE; /* are we inside the triangle? */
589 int cx[8];
590
591 for (i = 0; i < nr_planes; i++)
592 cx[i] = c[i];
593
594 for (x = ix0; x <= ix1; x++)
595 {
596 int out = 0;
597 int partial = 0;
598
599 for (i = 0; i < nr_planes; i++) {
600 int planeout = cx[i] + eo[i];
601 int planepartial = cx[i] + ei[i] - 1;
602 out |= (planeout >> 31);
603 partial |= (planepartial >> 31) & (1<<i);
604 }
605 if (out) {
606 /* do nothing */
607 if (in)
608 break; /* exiting triangle, all done with this row */
609 LP_COUNT(nr_empty_64);
610 }
611 else if (partial) {
612 /* Not trivially accepted by at least one plane -
613 * rasterize/shade partial tile
614 */
615 int count = util_bitcount(partial);
616 in = TRUE;
617 lp_scene_bin_command( scene, x, y,
618 lp_rast_tri_tab[count],
619 lp_rast_arg_triangle(line, partial) );
620
621 LP_COUNT(nr_partially_covered_64);
622 }
623 else {
624 /* triangle covers the whole tile- shade whole tile */
625 LP_COUNT(nr_fully_covered_64);
626 in = TRUE;
627 /* leverages on existing code in lp_setup_tri.c */
628 do_triangle_ccw_whole_tile(setup, scene, line, x, y,
629 opaque, &is_blit);
630 }
631
632 /* Iterate cx values across the region:
633 */
634 for (i = 0; i < nr_planes; i++)
635 cx[i] += xstep[i];
636 }
637
638 /* Iterate c values down the region:
639 */
640 for (i = 0; i < nr_planes; i++)
641 c[i] += ystep[i];
642 }
643 }
644 }
645
646
647 void lp_setup_choose_line( struct lp_setup_context *setup )
648 {
649 setup->line = lp_setup_line;
650 }
651
652