Merge branch 'mesa_7_7_branch'
[mesa.git] / src / gallium / drivers / softpipe / sp_setup.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * \brief Primitive rasterization/rendering (points, lines, triangles)
30 *
31 * \author Keith Whitwell <keith@tungstengraphics.com>
32 * \author Brian Paul
33 */
34
35 #include "sp_context.h"
36 #include "sp_quad.h"
37 #include "sp_quad_pipe.h"
38 #include "sp_setup.h"
39 #include "sp_state.h"
40 #include "draw/draw_context.h"
41 #include "draw/draw_private.h"
42 #include "draw/draw_vertex.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "util/u_math.h"
45 #include "util/u_memory.h"
46
47
48 #define DEBUG_VERTS 0
49 #define DEBUG_FRAGS 0
50
51 /**
52 * Triangle edge info
53 */
54 struct edge {
55 float dx; /**< X(v1) - X(v0), used only during setup */
56 float dy; /**< Y(v1) - Y(v0), used only during setup */
57 float dxdy; /**< dx/dy */
58 float sx, sy; /**< first sample point coord */
59 int lines; /**< number of lines on this edge */
60 };
61
62
63 #define MAX_QUADS 16
64
65
66 /**
67 * Triangle setup info (derived from draw_stage).
68 * Also used for line drawing (taking some liberties).
69 */
70 struct setup_context {
71 struct softpipe_context *softpipe;
72
73 /* Vertices are just an array of floats making up each attribute in
74 * turn. Currently fixed at 4 floats, but should change in time.
75 * Codegen will help cope with this.
76 */
77 const float (*vmax)[4];
78 const float (*vmid)[4];
79 const float (*vmin)[4];
80 const float (*vprovoke)[4];
81
82 struct edge ebot;
83 struct edge etop;
84 struct edge emaj;
85
86 float oneoverarea;
87 int facing;
88
89 float pixel_offset;
90
91 struct quad_header quad[MAX_QUADS];
92 struct quad_header *quad_ptrs[MAX_QUADS];
93 unsigned count;
94
95 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
96 struct tgsi_interp_coef posCoef; /* For Z, W */
97
98 struct {
99 int left[2]; /**< [0] = row0, [1] = row1 */
100 int right[2];
101 int y;
102 } span;
103
104 #if DEBUG_FRAGS
105 uint numFragsEmitted; /**< per primitive */
106 uint numFragsWritten; /**< per primitive */
107 #endif
108
109 unsigned winding; /* which winding to cull */
110 unsigned nr_vertex_attrs;
111 };
112
113
114
115
116 /**
117 * Do triangle cull test using tri determinant (sign indicates orientation)
118 * \return true if triangle is to be culled.
119 */
120 static INLINE boolean
121 cull_tri(const struct setup_context *setup, float det)
122 {
123 if (det != 0) {
124 /* if (det < 0 then Z points toward camera and triangle is
125 * counter-clockwise winding.
126 */
127 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
128
129 if ((winding & setup->winding) == 0)
130 return FALSE;
131 }
132
133 /* Culled:
134 */
135 return TRUE;
136 }
137
138
139
140 /**
141 * Clip setup->quad against the scissor/surface bounds.
142 */
143 static INLINE void
144 quad_clip( struct setup_context *setup, struct quad_header *quad )
145 {
146 const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
147 const int minx = (int) cliprect->minx;
148 const int maxx = (int) cliprect->maxx;
149 const int miny = (int) cliprect->miny;
150 const int maxy = (int) cliprect->maxy;
151
152 if (quad->input.x0 >= maxx ||
153 quad->input.y0 >= maxy ||
154 quad->input.x0 + 1 < minx ||
155 quad->input.y0 + 1 < miny) {
156 /* totally clipped */
157 quad->inout.mask = 0x0;
158 return;
159 }
160 if (quad->input.x0 < minx)
161 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
162 if (quad->input.y0 < miny)
163 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
164 if (quad->input.x0 == maxx - 1)
165 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
166 if (quad->input.y0 == maxy - 1)
167 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
168 }
169
170
171 /**
172 * Emit a quad (pass to next stage) with clipping.
173 */
174 static INLINE void
175 clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
176 {
177 quad_clip( setup, quad );
178
179 if (quad->inout.mask) {
180 struct softpipe_context *sp = setup->softpipe;
181
182 sp->quad.first->run( sp->quad.first, &quad, 1 );
183 }
184 }
185
186
187
188 /**
189 * Given an X or Y coordinate, return the block/quad coordinate that it
190 * belongs to.
191 */
192 static INLINE int block( int x )
193 {
194 return x & ~(2-1);
195 }
196
197 static INLINE int block_x( int x )
198 {
199 return x & ~(16-1);
200 }
201
202
203 /**
204 * Render a horizontal span of quads
205 */
206 static void flush_spans( struct setup_context *setup )
207 {
208 const int step = 16;
209 const int xleft0 = setup->span.left[0];
210 const int xleft1 = setup->span.left[1];
211 const int xright0 = setup->span.right[0];
212 const int xright1 = setup->span.right[1];
213 struct quad_stage *pipe = setup->softpipe->quad.first;
214
215
216 int minleft = block_x(MIN2(xleft0, xleft1));
217 int maxright = MAX2(xright0, xright1);
218 int x;
219
220 for (x = minleft; x < maxright; x += step) {
221 unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
222 unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
223 unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
224 unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
225 unsigned lx = x;
226 unsigned q = 0;
227
228 unsigned skipmask_left0 = (1U << skip_left0) - 1U;
229 unsigned skipmask_left1 = (1U << skip_left1) - 1U;
230
231 /* These calculations fail when step == 32 and skip_right == 0.
232 */
233 unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
234 unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
235
236 unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
237 unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
238
239 if (mask0 | mask1) {
240 do {
241 unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
242 if (quadmask) {
243 setup->quad[q].input.x0 = lx;
244 setup->quad[q].input.y0 = setup->span.y;
245 setup->quad[q].input.facing = setup->facing;
246 setup->quad[q].inout.mask = quadmask;
247 setup->quad_ptrs[q] = &setup->quad[q];
248 q++;
249 }
250 mask0 >>= 2;
251 mask1 >>= 2;
252 lx += 2;
253 } while (mask0 | mask1);
254
255 pipe->run( pipe, setup->quad_ptrs, q );
256 }
257 }
258
259
260 setup->span.y = 0;
261 setup->span.right[0] = 0;
262 setup->span.right[1] = 0;
263 setup->span.left[0] = 1000000; /* greater than right[0] */
264 setup->span.left[1] = 1000000; /* greater than right[1] */
265 }
266
267
268 #if DEBUG_VERTS
269 static void print_vertex(const struct setup_context *setup,
270 const float (*v)[4])
271 {
272 int i;
273 debug_printf(" Vertex: (%p)\n", (void *) v);
274 for (i = 0; i < setup->nr_vertex_attrs; i++) {
275 debug_printf(" %d: %f %f %f %f\n", i,
276 v[i][0], v[i][1], v[i][2], v[i][3]);
277 if (util_is_inf_or_nan(v[i][0])) {
278 debug_printf(" NaN!\n");
279 }
280 }
281 }
282 #endif
283
284 /**
285 * Sort the vertices from top to bottom order, setting up the triangle
286 * edge fields (ebot, emaj, etop).
287 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
288 */
289 static boolean setup_sort_vertices( struct setup_context *setup,
290 float det,
291 const float (*v0)[4],
292 const float (*v1)[4],
293 const float (*v2)[4] )
294 {
295 setup->vprovoke = v2;
296
297 /* determine bottom to top order of vertices */
298 {
299 float y0 = v0[0][1];
300 float y1 = v1[0][1];
301 float y2 = v2[0][1];
302 if (y0 <= y1) {
303 if (y1 <= y2) {
304 /* y0<=y1<=y2 */
305 setup->vmin = v0;
306 setup->vmid = v1;
307 setup->vmax = v2;
308 }
309 else if (y2 <= y0) {
310 /* y2<=y0<=y1 */
311 setup->vmin = v2;
312 setup->vmid = v0;
313 setup->vmax = v1;
314 }
315 else {
316 /* y0<=y2<=y1 */
317 setup->vmin = v0;
318 setup->vmid = v2;
319 setup->vmax = v1;
320 }
321 }
322 else {
323 if (y0 <= y2) {
324 /* y1<=y0<=y2 */
325 setup->vmin = v1;
326 setup->vmid = v0;
327 setup->vmax = v2;
328 }
329 else if (y2 <= y1) {
330 /* y2<=y1<=y0 */
331 setup->vmin = v2;
332 setup->vmid = v1;
333 setup->vmax = v0;
334 }
335 else {
336 /* y1<=y2<=y0 */
337 setup->vmin = v1;
338 setup->vmid = v2;
339 setup->vmax = v0;
340 }
341 }
342 }
343
344 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
345 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
346 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
347 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
348 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
349 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
350
351 /*
352 * Compute triangle's area. Use 1/area to compute partial
353 * derivatives of attributes later.
354 *
355 * The area will be the same as prim->det, but the sign may be
356 * different depending on how the vertices get sorted above.
357 *
358 * To determine whether the primitive is front or back facing we
359 * use the prim->det value because its sign is correct.
360 */
361 {
362 const float area = (setup->emaj.dx * setup->ebot.dy -
363 setup->ebot.dx * setup->emaj.dy);
364
365 setup->oneoverarea = 1.0f / area;
366
367 /*
368 debug_printf("%s one-over-area %f area %f det %f\n",
369 __FUNCTION__, setup->oneoverarea, area, det );
370 */
371 if (util_is_inf_or_nan(setup->oneoverarea))
372 return FALSE;
373 }
374
375 /* We need to know if this is a front or back-facing triangle for:
376 * - the GLSL gl_FrontFacing fragment attribute (bool)
377 * - two-sided stencil test
378 */
379 setup->facing =
380 ((det > 0.0) ^
381 (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW));
382
383 /* Prepare pixel offset for rasterisation:
384 * - pixel center (0.5, 0.5) for GL, or
385 * - assume (0.0, 0.0) for other APIs.
386 */
387 if (setup->softpipe->rasterizer->gl_rasterization_rules) {
388 setup->pixel_offset = 0.5f;
389 } else {
390 setup->pixel_offset = 0.0f;
391 }
392
393 return TRUE;
394 }
395
396
397 /**
398 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
399 * The value value comes from vertex[slot][i].
400 * The result will be put into setup->coef[slot].a0[i].
401 * \param slot which attribute slot
402 * \param i which component of the slot (0..3)
403 */
404 static void const_coeff( struct setup_context *setup,
405 struct tgsi_interp_coef *coef,
406 uint vertSlot, uint i)
407 {
408 assert(i <= 3);
409
410 coef->dadx[i] = 0;
411 coef->dady[i] = 0;
412
413 /* need provoking vertex info!
414 */
415 coef->a0[i] = setup->vprovoke[vertSlot][i];
416 }
417
418
419 /**
420 * Compute a0, dadx and dady for a linearly interpolated coefficient,
421 * for a triangle.
422 */
423 static void tri_linear_coeff( struct setup_context *setup,
424 struct tgsi_interp_coef *coef,
425 uint vertSlot, uint i)
426 {
427 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
428 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
429 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
430 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
431 float dadx = a * setup->oneoverarea;
432 float dady = b * setup->oneoverarea;
433
434 assert(i <= 3);
435
436 coef->dadx[i] = dadx;
437 coef->dady[i] = dady;
438
439 /* calculate a0 as the value which would be sampled for the
440 * fragment at (0,0), taking into account that we want to sample at
441 * pixel centers, in other words (pixel_offset, pixel_offset).
442 *
443 * this is neat but unfortunately not a good way to do things for
444 * triangles with very large values of dadx or dady as it will
445 * result in the subtraction and re-addition from a0 of a very
446 * large number, which means we'll end up loosing a lot of the
447 * fractional bits and precision from a0. the way to fix this is
448 * to define a0 as the sample at a pixel center somewhere near vmin
449 * instead - i'll switch to this later.
450 */
451 coef->a0[i] = (setup->vmin[vertSlot][i] -
452 (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
453 dady * (setup->vmin[0][1] - setup->pixel_offset)));
454
455 /*
456 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
457 slot, "xyzw"[i],
458 setup->coef[slot].a0[i],
459 setup->coef[slot].dadx[i],
460 setup->coef[slot].dady[i]);
461 */
462 }
463
464
465 /**
466 * Compute a0, dadx and dady for a perspective-corrected interpolant,
467 * for a triangle.
468 * We basically multiply the vertex value by 1/w before computing
469 * the plane coefficients (a0, dadx, dady).
470 * Later, when we compute the value at a particular fragment position we'll
471 * divide the interpolated value by the interpolated W at that fragment.
472 */
473 static void tri_persp_coeff( struct setup_context *setup,
474 struct tgsi_interp_coef *coef,
475 uint vertSlot, uint i)
476 {
477 /* premultiply by 1/w (v[0][3] is always W):
478 */
479 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
480 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
481 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
482 float botda = mida - mina;
483 float majda = maxa - mina;
484 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
485 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
486 float dadx = a * setup->oneoverarea;
487 float dady = b * setup->oneoverarea;
488
489 /*
490 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
491 setup->vmin[vertSlot][i],
492 setup->vmid[vertSlot][i],
493 setup->vmax[vertSlot][i]
494 );
495 */
496 assert(i <= 3);
497
498 coef->dadx[i] = dadx;
499 coef->dady[i] = dady;
500 coef->a0[i] = (mina -
501 (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
502 dady * (setup->vmin[0][1] - setup->pixel_offset)));
503 }
504
505
506 /**
507 * Special coefficient setup for gl_FragCoord.
508 * X and Y are trivial, though Y has to be inverted for OpenGL.
509 * Z and W are copied from posCoef which should have already been computed.
510 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
511 */
512 static void
513 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
514 {
515 /*X*/
516 setup->coef[slot].a0[0] = 0;
517 setup->coef[slot].dadx[0] = 1.0;
518 setup->coef[slot].dady[0] = 0.0;
519 /*Y*/
520 setup->coef[slot].a0[1] = 0.0;
521 setup->coef[slot].dadx[1] = 0.0;
522 setup->coef[slot].dady[1] = 1.0;
523 /*Z*/
524 setup->coef[slot].a0[2] = setup->posCoef.a0[2];
525 setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
526 setup->coef[slot].dady[2] = setup->posCoef.dady[2];
527 /*W*/
528 setup->coef[slot].a0[3] = setup->posCoef.a0[3];
529 setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
530 setup->coef[slot].dady[3] = setup->posCoef.dady[3];
531 }
532
533
534
535 /**
536 * Compute the setup->coef[] array dadx, dady, a0 values.
537 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
538 */
539 static void setup_tri_coefficients( struct setup_context *setup )
540 {
541 struct softpipe_context *softpipe = setup->softpipe;
542 const struct sp_fragment_shader *spfs = softpipe->fs;
543 const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
544 uint fragSlot;
545
546 /* z and w are done by linear interpolation:
547 */
548 tri_linear_coeff(setup, &setup->posCoef, 0, 2);
549 tri_linear_coeff(setup, &setup->posCoef, 0, 3);
550
551 /* setup interpolation for all the remaining attributes:
552 */
553 for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
554 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
555 uint j;
556
557 switch (vinfo->attrib[fragSlot].interp_mode) {
558 case INTERP_CONSTANT:
559 for (j = 0; j < NUM_CHANNELS; j++)
560 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
561 break;
562 case INTERP_LINEAR:
563 for (j = 0; j < NUM_CHANNELS; j++)
564 tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
565 break;
566 case INTERP_PERSPECTIVE:
567 for (j = 0; j < NUM_CHANNELS; j++)
568 tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
569 break;
570 case INTERP_POS:
571 setup_fragcoord_coeff(setup, fragSlot);
572 break;
573 default:
574 assert(0);
575 }
576
577 if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
578 setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
579 setup->coef[fragSlot].dadx[0] = 0.0;
580 setup->coef[fragSlot].dady[0] = 0.0;
581 }
582 }
583 }
584
585
586
587 static void setup_tri_edges( struct setup_context *setup )
588 {
589 float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
590 float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
591
592 float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
593 float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
594 float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
595
596 setup->emaj.sy = ceilf(vmin_y);
597 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
598 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
599 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
600
601 setup->etop.sy = ceilf(vmid_y);
602 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
603 setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
604 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
605
606 setup->ebot.sy = ceilf(vmin_y);
607 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
608 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
609 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
610 }
611
612
613 /**
614 * Render the upper or lower half of a triangle.
615 * Scissoring/cliprect is applied here too.
616 */
617 static void subtriangle( struct setup_context *setup,
618 struct edge *eleft,
619 struct edge *eright,
620 unsigned lines )
621 {
622 const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
623 const int minx = (int) cliprect->minx;
624 const int maxx = (int) cliprect->maxx;
625 const int miny = (int) cliprect->miny;
626 const int maxy = (int) cliprect->maxy;
627 int y, start_y, finish_y;
628 int sy = (int)eleft->sy;
629
630 assert((int)eleft->sy == (int) eright->sy);
631
632 /* clip top/bottom */
633 start_y = sy;
634 if (start_y < miny)
635 start_y = miny;
636
637 finish_y = sy + lines;
638 if (finish_y > maxy)
639 finish_y = maxy;
640
641 start_y -= sy;
642 finish_y -= sy;
643
644 /*
645 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
646 */
647
648 for (y = start_y; y < finish_y; y++) {
649
650 /* avoid accumulating adds as floats don't have the precision to
651 * accurately iterate large triangle edges that way. luckily we
652 * can just multiply these days.
653 *
654 * this is all drowned out by the attribute interpolation anyway.
655 */
656 int left = (int)(eleft->sx + y * eleft->dxdy);
657 int right = (int)(eright->sx + y * eright->dxdy);
658
659 /* clip left/right */
660 if (left < minx)
661 left = minx;
662 if (right > maxx)
663 right = maxx;
664
665 if (left < right) {
666 int _y = sy + y;
667 if (block(_y) != setup->span.y) {
668 flush_spans(setup);
669 setup->span.y = block(_y);
670 }
671
672 setup->span.left[_y&1] = left;
673 setup->span.right[_y&1] = right;
674 }
675 }
676
677
678 /* save the values so that emaj can be restarted:
679 */
680 eleft->sx += lines * eleft->dxdy;
681 eright->sx += lines * eright->dxdy;
682 eleft->sy += lines;
683 eright->sy += lines;
684 }
685
686
687 /**
688 * Recalculate prim's determinant. This is needed as we don't have
689 * get this information through the vbuf_render interface & we must
690 * calculate it here.
691 */
692 static float
693 calc_det( const float (*v0)[4],
694 const float (*v1)[4],
695 const float (*v2)[4] )
696 {
697 /* edge vectors e = v0 - v2, f = v1 - v2 */
698 const float ex = v0[0][0] - v2[0][0];
699 const float ey = v0[0][1] - v2[0][1];
700 const float fx = v1[0][0] - v2[0][0];
701 const float fy = v1[0][1] - v2[0][1];
702
703 /* det = cross(e,f).z */
704 return ex * fy - ey * fx;
705 }
706
707
708 /**
709 * Do setup for triangle rasterization, then render the triangle.
710 */
711 void sp_setup_tri( struct setup_context *setup,
712 const float (*v0)[4],
713 const float (*v1)[4],
714 const float (*v2)[4] )
715 {
716 float det;
717
718 #if DEBUG_VERTS
719 debug_printf("Setup triangle:\n");
720 print_vertex(setup, v0);
721 print_vertex(setup, v1);
722 print_vertex(setup, v2);
723 #endif
724
725 if (setup->softpipe->no_rast)
726 return;
727
728 det = calc_det(v0, v1, v2);
729 /*
730 debug_printf("%s\n", __FUNCTION__ );
731 */
732
733 #if DEBUG_FRAGS
734 setup->numFragsEmitted = 0;
735 setup->numFragsWritten = 0;
736 #endif
737
738 if (cull_tri( setup, det ))
739 return;
740
741 if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
742 return;
743 setup_tri_coefficients( setup );
744 setup_tri_edges( setup );
745
746 assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
747
748 setup->span.y = 0;
749 setup->span.right[0] = 0;
750 setup->span.right[1] = 0;
751 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
752
753 /* init_constant_attribs( setup ); */
754
755 if (setup->oneoverarea < 0.0) {
756 /* emaj on left:
757 */
758 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
759 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
760 }
761 else {
762 /* emaj on right:
763 */
764 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
765 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
766 }
767
768 flush_spans( setup );
769
770 #if DEBUG_FRAGS
771 printf("Tri: %u frags emitted, %u written\n",
772 setup->numFragsEmitted,
773 setup->numFragsWritten);
774 #endif
775 }
776
777
778
779 /**
780 * Compute a0, dadx and dady for a linearly interpolated coefficient,
781 * for a line.
782 */
783 static void
784 line_linear_coeff(const struct setup_context *setup,
785 struct tgsi_interp_coef *coef,
786 uint vertSlot, uint i)
787 {
788 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
789 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
790 const float dady = da * setup->emaj.dy * setup->oneoverarea;
791 coef->dadx[i] = dadx;
792 coef->dady[i] = dady;
793 coef->a0[i] = (setup->vmin[vertSlot][i] -
794 (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
795 dady * (setup->vmin[0][1] - setup->pixel_offset)));
796 }
797
798
799 /**
800 * Compute a0, dadx and dady for a perspective-corrected interpolant,
801 * for a line.
802 */
803 static void
804 line_persp_coeff(const struct setup_context *setup,
805 struct tgsi_interp_coef *coef,
806 uint vertSlot, uint i)
807 {
808 /* XXX double-check/verify this arithmetic */
809 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
810 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
811 const float da = a1 - a0;
812 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
813 const float dady = da * setup->emaj.dy * setup->oneoverarea;
814 coef->dadx[i] = dadx;
815 coef->dady[i] = dady;
816 coef->a0[i] = (setup->vmin[vertSlot][i] -
817 (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
818 dady * (setup->vmin[0][1] - setup->pixel_offset)));
819 }
820
821
822 /**
823 * Compute the setup->coef[] array dadx, dady, a0 values.
824 * Must be called after setup->vmin,vmax are initialized.
825 */
826 static INLINE boolean
827 setup_line_coefficients(struct setup_context *setup,
828 const float (*v0)[4],
829 const float (*v1)[4])
830 {
831 struct softpipe_context *softpipe = setup->softpipe;
832 const struct sp_fragment_shader *spfs = softpipe->fs;
833 const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
834 uint fragSlot;
835 float area;
836
837 /* use setup->vmin, vmax to point to vertices */
838 if (softpipe->rasterizer->flatshade_first)
839 setup->vprovoke = v0;
840 else
841 setup->vprovoke = v1;
842 setup->vmin = v0;
843 setup->vmax = v1;
844
845 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
846 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
847
848 /* NOTE: this is not really area but something proportional to it */
849 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
850 if (area == 0.0f || util_is_inf_or_nan(area))
851 return FALSE;
852 setup->oneoverarea = 1.0f / area;
853
854 /* z and w are done by linear interpolation:
855 */
856 line_linear_coeff(setup, &setup->posCoef, 0, 2);
857 line_linear_coeff(setup, &setup->posCoef, 0, 3);
858
859 /* setup interpolation for all the remaining attributes:
860 */
861 for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
862 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
863 uint j;
864
865 switch (vinfo->attrib[fragSlot].interp_mode) {
866 case INTERP_CONSTANT:
867 for (j = 0; j < NUM_CHANNELS; j++)
868 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
869 break;
870 case INTERP_LINEAR:
871 for (j = 0; j < NUM_CHANNELS; j++)
872 line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
873 break;
874 case INTERP_PERSPECTIVE:
875 for (j = 0; j < NUM_CHANNELS; j++)
876 line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
877 break;
878 case INTERP_POS:
879 setup_fragcoord_coeff(setup, fragSlot);
880 break;
881 default:
882 assert(0);
883 }
884
885 if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
886 setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
887 setup->coef[fragSlot].dadx[0] = 0.0;
888 setup->coef[fragSlot].dady[0] = 0.0;
889 }
890 }
891 return TRUE;
892 }
893
894
895 /**
896 * Plot a pixel in a line segment.
897 */
898 static INLINE void
899 plot(struct setup_context *setup, int x, int y)
900 {
901 const int iy = y & 1;
902 const int ix = x & 1;
903 const int quadX = x - ix;
904 const int quadY = y - iy;
905 const int mask = (1 << ix) << (2 * iy);
906
907 if (quadX != setup->quad[0].input.x0 ||
908 quadY != setup->quad[0].input.y0)
909 {
910 /* flush prev quad, start new quad */
911
912 if (setup->quad[0].input.x0 != -1)
913 clip_emit_quad( setup, &setup->quad[0] );
914
915 setup->quad[0].input.x0 = quadX;
916 setup->quad[0].input.y0 = quadY;
917 setup->quad[0].inout.mask = 0x0;
918 }
919
920 setup->quad[0].inout.mask |= mask;
921 }
922
923
924 /**
925 * Do setup for line rasterization, then render the line.
926 * Single-pixel width, no stipple, etc. We rely on the 'draw' module
927 * to handle stippling and wide lines.
928 */
929 void
930 sp_setup_line(struct setup_context *setup,
931 const float (*v0)[4],
932 const float (*v1)[4])
933 {
934 int x0 = (int) v0[0][0];
935 int x1 = (int) v1[0][0];
936 int y0 = (int) v0[0][1];
937 int y1 = (int) v1[0][1];
938 int dx = x1 - x0;
939 int dy = y1 - y0;
940 int xstep, ystep;
941
942 #if DEBUG_VERTS
943 debug_printf("Setup line:\n");
944 print_vertex(setup, v0);
945 print_vertex(setup, v1);
946 #endif
947
948 if (setup->softpipe->no_rast)
949 return;
950
951 if (dx == 0 && dy == 0)
952 return;
953
954 if (!setup_line_coefficients(setup, v0, v1))
955 return;
956
957 assert(v0[0][0] < 1.0e9);
958 assert(v0[0][1] < 1.0e9);
959 assert(v1[0][0] < 1.0e9);
960 assert(v1[0][1] < 1.0e9);
961
962 if (dx < 0) {
963 dx = -dx; /* make positive */
964 xstep = -1;
965 }
966 else {
967 xstep = 1;
968 }
969
970 if (dy < 0) {
971 dy = -dy; /* make positive */
972 ystep = -1;
973 }
974 else {
975 ystep = 1;
976 }
977
978 assert(dx >= 0);
979 assert(dy >= 0);
980 assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES);
981
982 setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
983 setup->quad[0].inout.mask = 0x0;
984
985 /* XXX temporary: set coverage to 1.0 so the line appears
986 * if AA mode happens to be enabled.
987 */
988 setup->quad[0].input.coverage[0] =
989 setup->quad[0].input.coverage[1] =
990 setup->quad[0].input.coverage[2] =
991 setup->quad[0].input.coverage[3] = 1.0;
992
993 if (dx > dy) {
994 /*** X-major line ***/
995 int i;
996 const int errorInc = dy + dy;
997 int error = errorInc - dx;
998 const int errorDec = error - dx;
999
1000 for (i = 0; i < dx; i++) {
1001 plot(setup, x0, y0);
1002
1003 x0 += xstep;
1004 if (error < 0) {
1005 error += errorInc;
1006 }
1007 else {
1008 error += errorDec;
1009 y0 += ystep;
1010 }
1011 }
1012 }
1013 else {
1014 /*** Y-major line ***/
1015 int i;
1016 const int errorInc = dx + dx;
1017 int error = errorInc - dy;
1018 const int errorDec = error - dy;
1019
1020 for (i = 0; i < dy; i++) {
1021 plot(setup, x0, y0);
1022
1023 y0 += ystep;
1024 if (error < 0) {
1025 error += errorInc;
1026 }
1027 else {
1028 error += errorDec;
1029 x0 += xstep;
1030 }
1031 }
1032 }
1033
1034 /* draw final quad */
1035 if (setup->quad[0].inout.mask) {
1036 clip_emit_quad( setup, &setup->quad[0] );
1037 }
1038 }
1039
1040
1041 static void
1042 point_persp_coeff(const struct setup_context *setup,
1043 const float (*vert)[4],
1044 struct tgsi_interp_coef *coef,
1045 uint vertSlot, uint i)
1046 {
1047 assert(i <= 3);
1048 coef->dadx[i] = 0.0F;
1049 coef->dady[i] = 0.0F;
1050 coef->a0[i] = vert[vertSlot][i] * vert[0][3];
1051 }
1052
1053
1054 /**
1055 * Do setup for point rasterization, then render the point.
1056 * Round or square points...
1057 * XXX could optimize a lot for 1-pixel points.
1058 */
1059 void
1060 sp_setup_point( struct setup_context *setup,
1061 const float (*v0)[4] )
1062 {
1063 struct softpipe_context *softpipe = setup->softpipe;
1064 const struct sp_fragment_shader *spfs = softpipe->fs;
1065 const int sizeAttr = setup->softpipe->psize_slot;
1066 const float size
1067 = sizeAttr > 0 ? v0[sizeAttr][0]
1068 : setup->softpipe->rasterizer->point_size;
1069 const float halfSize = 0.5F * size;
1070 const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
1071 const float x = v0[0][0]; /* Note: data[0] is always position */
1072 const float y = v0[0][1];
1073 const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
1074 uint fragSlot;
1075
1076 #if DEBUG_VERTS
1077 debug_printf("Setup point:\n");
1078 print_vertex(setup, v0);
1079 #endif
1080
1081 if (softpipe->no_rast)
1082 return;
1083
1084 assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS);
1085
1086 /* For points, all interpolants are constant-valued.
1087 * However, for point sprites, we'll need to setup texcoords appropriately.
1088 * XXX: which coefficients are the texcoords???
1089 * We may do point sprites as textured quads...
1090 *
1091 * KW: We don't know which coefficients are texcoords - ultimately
1092 * the choice of what interpolation mode to use for each attribute
1093 * should be determined by the fragment program, using
1094 * per-attribute declaration statements that include interpolation
1095 * mode as a parameter. So either the fragment program will have
1096 * to be adjusted for pointsprite vs normal point behaviour, or
1097 * otherwise a special interpolation mode will have to be defined
1098 * which matches the required behaviour for point sprites. But -
1099 * the latter is not a feature of normal hardware, and as such
1100 * probably should be ruled out on that basis.
1101 */
1102 setup->vprovoke = v0;
1103
1104 /* setup Z, W */
1105 const_coeff(setup, &setup->posCoef, 0, 2);
1106 const_coeff(setup, &setup->posCoef, 0, 3);
1107
1108 for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
1109 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1110 uint j;
1111
1112 switch (vinfo->attrib[fragSlot].interp_mode) {
1113 case INTERP_CONSTANT:
1114 /* fall-through */
1115 case INTERP_LINEAR:
1116 for (j = 0; j < NUM_CHANNELS; j++)
1117 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1118 break;
1119 case INTERP_PERSPECTIVE:
1120 for (j = 0; j < NUM_CHANNELS; j++)
1121 point_persp_coeff(setup, setup->vprovoke,
1122 &setup->coef[fragSlot], vertSlot, j);
1123 break;
1124 case INTERP_POS:
1125 setup_fragcoord_coeff(setup, fragSlot);
1126 break;
1127 default:
1128 assert(0);
1129 }
1130
1131 if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1132 setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
1133 setup->coef[fragSlot].dadx[0] = 0.0;
1134 setup->coef[fragSlot].dady[0] = 0.0;
1135 }
1136 }
1137
1138
1139 if (halfSize <= 0.5 && !round) {
1140 /* special case for 1-pixel points */
1141 const int ix = ((int) x) & 1;
1142 const int iy = ((int) y) & 1;
1143 setup->quad[0].input.x0 = (int) x - ix;
1144 setup->quad[0].input.y0 = (int) y - iy;
1145 setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
1146 clip_emit_quad( setup, &setup->quad[0] );
1147 }
1148 else {
1149 if (round) {
1150 /* rounded points */
1151 const int ixmin = block((int) (x - halfSize));
1152 const int ixmax = block((int) (x + halfSize));
1153 const int iymin = block((int) (y - halfSize));
1154 const int iymax = block((int) (y + halfSize));
1155 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */
1156 const float rmax = halfSize + 0.7071F;
1157 const float rmin2 = MAX2(0.0F, rmin * rmin);
1158 const float rmax2 = rmax * rmax;
1159 const float cscale = 1.0F / (rmax2 - rmin2);
1160 int ix, iy;
1161
1162 for (iy = iymin; iy <= iymax; iy += 2) {
1163 for (ix = ixmin; ix <= ixmax; ix += 2) {
1164 float dx, dy, dist2, cover;
1165
1166 setup->quad[0].inout.mask = 0x0;
1167
1168 dx = (ix + 0.5f) - x;
1169 dy = (iy + 0.5f) - y;
1170 dist2 = dx * dx + dy * dy;
1171 if (dist2 <= rmax2) {
1172 cover = 1.0F - (dist2 - rmin2) * cscale;
1173 setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1174 setup->quad[0].inout.mask |= MASK_TOP_LEFT;
1175 }
1176
1177 dx = (ix + 1.5f) - x;
1178 dy = (iy + 0.5f) - y;
1179 dist2 = dx * dx + dy * dy;
1180 if (dist2 <= rmax2) {
1181 cover = 1.0F - (dist2 - rmin2) * cscale;
1182 setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1183 setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
1184 }
1185
1186 dx = (ix + 0.5f) - x;
1187 dy = (iy + 1.5f) - y;
1188 dist2 = dx * dx + dy * dy;
1189 if (dist2 <= rmax2) {
1190 cover = 1.0F - (dist2 - rmin2) * cscale;
1191 setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1192 setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
1193 }
1194
1195 dx = (ix + 1.5f) - x;
1196 dy = (iy + 1.5f) - y;
1197 dist2 = dx * dx + dy * dy;
1198 if (dist2 <= rmax2) {
1199 cover = 1.0F - (dist2 - rmin2) * cscale;
1200 setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1201 setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
1202 }
1203
1204 if (setup->quad[0].inout.mask) {
1205 setup->quad[0].input.x0 = ix;
1206 setup->quad[0].input.y0 = iy;
1207 clip_emit_quad( setup, &setup->quad[0] );
1208 }
1209 }
1210 }
1211 }
1212 else {
1213 /* square points */
1214 const int xmin = (int) (x + 0.75 - halfSize);
1215 const int ymin = (int) (y + 0.25 - halfSize);
1216 const int xmax = xmin + (int) size;
1217 const int ymax = ymin + (int) size;
1218 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1219 const int ixmin = block(xmin);
1220 const int ixmax = block(xmax - 1);
1221 const int iymin = block(ymin);
1222 const int iymax = block(ymax - 1);
1223 int ix, iy;
1224
1225 /*
1226 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1227 */
1228 for (iy = iymin; iy <= iymax; iy += 2) {
1229 uint rowMask = 0xf;
1230 if (iy < ymin) {
1231 /* above the top edge */
1232 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1233 }
1234 if (iy + 1 >= ymax) {
1235 /* below the bottom edge */
1236 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1237 }
1238
1239 for (ix = ixmin; ix <= ixmax; ix += 2) {
1240 uint mask = rowMask;
1241
1242 if (ix < xmin) {
1243 /* fragment is past left edge of point, turn off left bits */
1244 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1245 }
1246 if (ix + 1 >= xmax) {
1247 /* past the right edge */
1248 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1249 }
1250
1251 setup->quad[0].inout.mask = mask;
1252 setup->quad[0].input.x0 = ix;
1253 setup->quad[0].input.y0 = iy;
1254 clip_emit_quad( setup, &setup->quad[0] );
1255 }
1256 }
1257 }
1258 }
1259 }
1260
1261 void sp_setup_prepare( struct setup_context *setup )
1262 {
1263 struct softpipe_context *sp = setup->softpipe;
1264
1265 if (sp->dirty) {
1266 softpipe_update_derived(sp);
1267 }
1268
1269 /* Note: nr_attrs is only used for debugging (vertex printing) */
1270 setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
1271
1272 sp->quad.first->begin( sp->quad.first );
1273
1274 if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1275 sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1276 sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1277 /* we'll do culling */
1278 setup->winding = sp->rasterizer->cull_mode;
1279 }
1280 else {
1281 /* 'draw' will do culling */
1282 setup->winding = PIPE_WINDING_NONE;
1283 }
1284 }
1285
1286
1287
1288 void sp_setup_destroy_context( struct setup_context *setup )
1289 {
1290 FREE( setup );
1291 }
1292
1293
1294 /**
1295 * Create a new primitive setup/render stage.
1296 */
1297 struct setup_context *sp_setup_create_context( struct softpipe_context *softpipe )
1298 {
1299 struct setup_context *setup = CALLOC_STRUCT(setup_context);
1300 unsigned i;
1301
1302 setup->softpipe = softpipe;
1303
1304 for (i = 0; i < MAX_QUADS; i++) {
1305 setup->quad[i].coef = setup->coef;
1306 setup->quad[i].posCoef = &setup->posCoef;
1307 }
1308
1309 setup->span.left[0] = 1000000; /* greater than right[0] */
1310 setup->span.left[1] = 1000000; /* greater than right[1] */
1311
1312 return setup;
1313 }
1314