Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / softpipe / sp_setup.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * \brief Primitive rasterization/rendering (points, lines, triangles)
30 *
31 * \author Keith Whitwell <keith@tungstengraphics.com>
32 * \author Brian Paul
33 */
34
35 #include "sp_setup.h"
36
37 #include "sp_context.h"
38 #include "sp_headers.h"
39 #include "sp_quad.h"
40 #include "sp_state.h"
41 #include "sp_prim_setup.h"
42 #include "draw/draw_context.h"
43 #include "draw/draw_private.h"
44 #include "draw/draw_vertex.h"
45 #include "pipe/p_shader_tokens.h"
46 #include "pipe/p_thread.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49
50
51 #define DEBUG_VERTS 0
52 #define DEBUG_FRAGS 0
53
54 /**
55 * Triangle edge info
56 */
57 struct edge {
58 float dx; /**< X(v1) - X(v0), used only during setup */
59 float dy; /**< Y(v1) - Y(v0), used only during setup */
60 float dxdy; /**< dx/dy */
61 float sx, sy; /**< first sample point coord */
62 int lines; /**< number of lines on this edge */
63 };
64
65 #if SP_NUM_QUAD_THREADS > 1
66
67 /* Set to 1 if you want other threads to be instantly
68 * notified of pending jobs.
69 */
70 #define INSTANT_NOTEMPTY_NOTIFY 0
71
72 struct thread_info
73 {
74 struct setup_context *setup;
75 uint id;
76 pipe_thread handle;
77 };
78
79 struct quad_job;
80
81 typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job );
82
83 struct quad_job
84 {
85 struct quad_header_input input;
86 struct quad_header_inout inout;
87 quad_job_routine routine;
88 };
89
90 #define NUM_QUAD_JOBS 64
91
92 struct quad_job_que
93 {
94 struct quad_job jobs[NUM_QUAD_JOBS];
95 uint first;
96 uint last;
97 pipe_mutex que_mutex;
98 pipe_condvar que_notfull_condvar;
99 pipe_condvar que_notempty_condvar;
100 uint jobs_added;
101 uint jobs_done;
102 pipe_condvar que_done_condvar;
103 };
104
105 static void
106 add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine )
107 {
108 #if INSTANT_NOTEMPTY_NOTIFY
109 boolean empty;
110 #endif
111
112 /* Wait for empty slot, see if the que is empty.
113 */
114 pipe_mutex_lock( que->que_mutex );
115 while ((que->last + 1) % NUM_QUAD_JOBS == que->first) {
116 #if !INSTANT_NOTEMPTY_NOTIFY
117 pipe_condvar_broadcast( que->que_notempty_condvar );
118 #endif
119 pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex );
120 }
121 #if INSTANT_NOTEMPTY_NOTIFY
122 empty = que->last == que->first;
123 #endif
124 que->jobs_added++;
125 pipe_mutex_unlock( que->que_mutex );
126
127 /* Submit new job.
128 */
129 que->jobs[que->last].input = quad->input;
130 que->jobs[que->last].inout = quad->inout;
131 que->jobs[que->last].routine = routine;
132 que->last = (que->last + 1) % NUM_QUAD_JOBS;
133
134 #if INSTANT_NOTEMPTY_NOTIFY
135 /* If the que was empty, notify consumers there's a job to be done.
136 */
137 if (empty) {
138 pipe_mutex_lock( que->que_mutex );
139 pipe_condvar_broadcast( que->que_notempty_condvar );
140 pipe_mutex_unlock( que->que_mutex );
141 }
142 #endif
143 }
144
145 #endif
146
147 /**
148 * Triangle setup info (derived from draw_stage).
149 * Also used for line drawing (taking some liberties).
150 */
151 struct setup_context {
152 struct softpipe_context *softpipe;
153
154 /* Vertices are just an array of floats making up each attribute in
155 * turn. Currently fixed at 4 floats, but should change in time.
156 * Codegen will help cope with this.
157 */
158 const float (*vmax)[4];
159 const float (*vmid)[4];
160 const float (*vmin)[4];
161 const float (*vprovoke)[4];
162
163 struct edge ebot;
164 struct edge etop;
165 struct edge emaj;
166
167 float oneoverarea;
168
169 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
170 struct tgsi_interp_coef posCoef; /* For Z, W */
171 struct quad_header quad;
172
173 #if SP_NUM_QUAD_THREADS > 1
174 struct quad_job_que que;
175 struct thread_info threads[SP_NUM_QUAD_THREADS];
176 #endif
177
178 struct {
179 int left[2]; /**< [0] = row0, [1] = row1 */
180 int right[2];
181 int y;
182 unsigned y_flags;
183 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
184 } span;
185
186 #if DEBUG_FRAGS
187 uint numFragsEmitted; /**< per primitive */
188 uint numFragsWritten; /**< per primitive */
189 #endif
190
191 unsigned winding; /* which winding to cull */
192 };
193
194 #if SP_NUM_QUAD_THREADS > 1
195
196 static PIPE_THREAD_ROUTINE( quad_thread, param )
197 {
198 struct thread_info *info = (struct thread_info *) param;
199 struct quad_job_que *que = &info->setup->que;
200
201 for (;;) {
202 struct quad_job job;
203 boolean full;
204
205 /* Wait for an available job.
206 */
207 pipe_mutex_lock( que->que_mutex );
208 while (que->last == que->first)
209 pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex );
210
211 /* See if the que is full.
212 */
213 full = (que->last + 1) % NUM_QUAD_JOBS == que->first;
214
215 /* Take a job and remove it from que.
216 */
217 job = que->jobs[que->first];
218 que->first = (que->first + 1) % NUM_QUAD_JOBS;
219
220 /* Notify the producer if the que is not full.
221 */
222 if (full)
223 pipe_condvar_signal( que->que_notfull_condvar );
224 pipe_mutex_unlock( que->que_mutex );
225
226 job.routine( info->setup, info->id, &job );
227
228 /* Notify the producer if that's the last finished job.
229 */
230 pipe_mutex_lock( que->que_mutex );
231 que->jobs_done++;
232 if (que->jobs_added == que->jobs_done)
233 pipe_condvar_signal( que->que_done_condvar );
234 pipe_mutex_unlock( que->que_mutex );
235 }
236
237 return NULL;
238 }
239
240 #define WAIT_FOR_COMPLETION(setup) \
241 do {\
242 pipe_mutex_lock( setup->que.que_mutex );\
243 if (!INSTANT_NOTEMPTY_NOTIFY)\
244 pipe_condvar_broadcast( setup->que.que_notempty_condvar );\
245 while (setup->que.jobs_added != setup->que.jobs_done)\
246 pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\
247 pipe_mutex_unlock( setup->que.que_mutex );\
248 } while (0)
249
250 #else
251
252 #define WAIT_FOR_COMPLETION(setup) ((void) 0)
253
254 #endif
255
256 /**
257 * Test if x is NaN or +/- infinity.
258 */
259 static INLINE boolean
260 is_inf_or_nan(float x)
261 {
262 union fi tmp;
263 tmp.f = x;
264 return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
265 }
266
267
268 static boolean cull_tri( struct setup_context *setup,
269 float det )
270 {
271 if (det != 0)
272 {
273 /* if (det < 0 then Z points toward camera and triangle is
274 * counter-clockwise winding.
275 */
276 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
277
278 if ((winding & setup->winding) == 0)
279 return FALSE;
280 }
281
282 /* Culled:
283 */
284 return TRUE;
285 }
286
287
288
289 /**
290 * Clip setup->quad against the scissor/surface bounds.
291 */
292 static INLINE void
293 quad_clip( struct setup_context *setup, struct quad_header *quad )
294 {
295 const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
296 const int minx = (int) cliprect->minx;
297 const int maxx = (int) cliprect->maxx;
298 const int miny = (int) cliprect->miny;
299 const int maxy = (int) cliprect->maxy;
300
301 if (quad->input.x0 >= maxx ||
302 quad->input.y0 >= maxy ||
303 quad->input.x0 + 1 < minx ||
304 quad->input.y0 + 1 < miny) {
305 /* totally clipped */
306 quad->inout.mask = 0x0;
307 return;
308 }
309 if (quad->input.x0 < minx)
310 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
311 if (quad->input.y0 < miny)
312 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
313 if (quad->input.x0 == maxx - 1)
314 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
315 if (quad->input.y0 == maxy - 1)
316 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
317 }
318
319
320 /**
321 * Emit a quad (pass to next stage) with clipping.
322 */
323 static INLINE void
324 clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
325 {
326 quad_clip( setup, quad );
327 if (quad->inout.mask) {
328 struct softpipe_context *sp = setup->softpipe;
329
330 sp->quad[thread].first->run( sp->quad[thread].first, quad );
331 }
332 }
333
334 #if SP_NUM_QUAD_THREADS > 1
335
336 static void
337 clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
338 {
339 struct quad_header quad;
340
341 quad.input = job->input;
342 quad.inout = job->inout;
343 quad.coef = setup->quad.coef;
344 quad.posCoef = setup->quad.posCoef;
345 quad.nr_attrs = setup->quad.nr_attrs;
346 clip_emit_quad( setup, &quad, thread );
347 }
348
349 #define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job )
350
351 #else
352
353 #define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 )
354
355 #endif
356
357 /**
358 * Emit a quad (pass to next stage). No clipping is done.
359 */
360 static INLINE void
361 emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
362 {
363 struct softpipe_context *sp = setup->softpipe;
364 #if DEBUG_FRAGS
365 uint mask = quad->inout.mask;
366 #endif
367
368 #if DEBUG_FRAGS
369 if (mask & 1) setup->numFragsEmitted++;
370 if (mask & 2) setup->numFragsEmitted++;
371 if (mask & 4) setup->numFragsEmitted++;
372 if (mask & 8) setup->numFragsEmitted++;
373 #endif
374 sp->quad[thread].first->run( sp->quad[thread].first, quad );
375 #if DEBUG_FRAGS
376 mask = quad->inout.mask;
377 if (mask & 1) setup->numFragsWritten++;
378 if (mask & 2) setup->numFragsWritten++;
379 if (mask & 4) setup->numFragsWritten++;
380 if (mask & 8) setup->numFragsWritten++;
381 #endif
382 }
383
384 #if SP_NUM_QUAD_THREADS > 1
385
386 static void
387 emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
388 {
389 struct quad_header quad;
390
391 quad.input = job->input;
392 quad.inout = job->inout;
393 quad.coef = setup->quad.coef;
394 quad.posCoef = setup->quad.posCoef;
395 quad.nr_attrs = setup->quad.nr_attrs;
396 emit_quad( setup, &quad, thread );
397 }
398
399 #define EMIT_QUAD(setup,x,y,mask) do {\
400 setup->quad.input.x0 = x;\
401 setup->quad.input.y0 = y;\
402 setup->quad.inout.mask = mask;\
403 add_quad_job( &setup->que, &setup->quad, emit_quad_job );\
404 } while (0)
405
406 #else
407
408 #define EMIT_QUAD(setup,x,y,mask) do {\
409 setup->quad.input.x0 = x;\
410 setup->quad.input.y0 = y;\
411 setup->quad.inout.mask = mask;\
412 emit_quad( setup, &setup->quad, 0 );\
413 } while (0)
414
415 #endif
416
417 /**
418 * Given an X or Y coordinate, return the block/quad coordinate that it
419 * belongs to.
420 */
421 static INLINE int block( int x )
422 {
423 return x & ~1;
424 }
425
426
427 /**
428 * Render a horizontal span of quads
429 */
430 static void flush_spans( struct setup_context *setup )
431 {
432 const int xleft0 = setup->span.left[0];
433 const int xleft1 = setup->span.left[1];
434 const int xright0 = setup->span.right[0];
435 const int xright1 = setup->span.right[1];
436 int minleft, maxright;
437 int x;
438
439 switch (setup->span.y_flags) {
440 case 0x3:
441 /* both odd and even lines written (both quad rows) */
442 minleft = block(MIN2(xleft0, xleft1));
443 maxright = block(MAX2(xright0, xright1));
444 for (x = minleft; x <= maxright; x += 2) {
445 /* determine which of the four pixels is inside the span bounds */
446 uint mask = 0x0;
447 if (x >= xleft0 && x < xright0)
448 mask |= MASK_TOP_LEFT;
449 if (x >= xleft1 && x < xright1)
450 mask |= MASK_BOTTOM_LEFT;
451 if (x+1 >= xleft0 && x+1 < xright0)
452 mask |= MASK_TOP_RIGHT;
453 if (x+1 >= xleft1 && x+1 < xright1)
454 mask |= MASK_BOTTOM_RIGHT;
455 EMIT_QUAD( setup, x, setup->span.y, mask );
456 }
457 break;
458
459 case 0x1:
460 /* only even line written (quad top row) */
461 minleft = block(xleft0);
462 maxright = block(xright0);
463 for (x = minleft; x <= maxright; x += 2) {
464 uint mask = 0x0;
465 if (x >= xleft0 && x < xright0)
466 mask |= MASK_TOP_LEFT;
467 if (x+1 >= xleft0 && x+1 < xright0)
468 mask |= MASK_TOP_RIGHT;
469 EMIT_QUAD( setup, x, setup->span.y, mask );
470 }
471 break;
472
473 case 0x2:
474 /* only odd line written (quad bottom row) */
475 minleft = block(xleft1);
476 maxright = block(xright1);
477 for (x = minleft; x <= maxright; x += 2) {
478 uint mask = 0x0;
479 if (x >= xleft1 && x < xright1)
480 mask |= MASK_BOTTOM_LEFT;
481 if (x+1 >= xleft1 && x+1 < xright1)
482 mask |= MASK_BOTTOM_RIGHT;
483 EMIT_QUAD( setup, x, setup->span.y, mask );
484 }
485 break;
486
487 default:
488 return;
489 }
490
491 setup->span.y = 0;
492 setup->span.y_flags = 0;
493 setup->span.right[0] = 0;
494 setup->span.right[1] = 0;
495 }
496
497
498 #if DEBUG_VERTS
499 static void print_vertex(const struct setup_context *setup,
500 const float (*v)[4])
501 {
502 int i;
503 debug_printf(" Vertex: (%p)\n", v);
504 for (i = 0; i < setup->quad.nr_attrs; i++) {
505 debug_printf(" %d: %f %f %f %f\n", i,
506 v[i][0], v[i][1], v[i][2], v[i][3]);
507 }
508 }
509 #endif
510
511 /**
512 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
513 */
514 static boolean setup_sort_vertices( struct setup_context *setup,
515 float det,
516 const float (*v0)[4],
517 const float (*v1)[4],
518 const float (*v2)[4] )
519 {
520 setup->vprovoke = v2;
521
522 /* determine bottom to top order of vertices */
523 {
524 float y0 = v0[0][1];
525 float y1 = v1[0][1];
526 float y2 = v2[0][1];
527 if (y0 <= y1) {
528 if (y1 <= y2) {
529 /* y0<=y1<=y2 */
530 setup->vmin = v0;
531 setup->vmid = v1;
532 setup->vmax = v2;
533 }
534 else if (y2 <= y0) {
535 /* y2<=y0<=y1 */
536 setup->vmin = v2;
537 setup->vmid = v0;
538 setup->vmax = v1;
539 }
540 else {
541 /* y0<=y2<=y1 */
542 setup->vmin = v0;
543 setup->vmid = v2;
544 setup->vmax = v1;
545 }
546 }
547 else {
548 if (y0 <= y2) {
549 /* y1<=y0<=y2 */
550 setup->vmin = v1;
551 setup->vmid = v0;
552 setup->vmax = v2;
553 }
554 else if (y2 <= y1) {
555 /* y2<=y1<=y0 */
556 setup->vmin = v2;
557 setup->vmid = v1;
558 setup->vmax = v0;
559 }
560 else {
561 /* y1<=y2<=y0 */
562 setup->vmin = v1;
563 setup->vmid = v2;
564 setup->vmax = v0;
565 }
566 }
567 }
568
569 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
570 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
571 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
572 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
573 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
574 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
575
576 /*
577 * Compute triangle's area. Use 1/area to compute partial
578 * derivatives of attributes later.
579 *
580 * The area will be the same as prim->det, but the sign may be
581 * different depending on how the vertices get sorted above.
582 *
583 * To determine whether the primitive is front or back facing we
584 * use the prim->det value because its sign is correct.
585 */
586 {
587 const float area = (setup->emaj.dx * setup->ebot.dy -
588 setup->ebot.dx * setup->emaj.dy);
589
590 setup->oneoverarea = 1.0f / area;
591
592 /*
593 debug_printf("%s one-over-area %f area %f det %f\n",
594 __FUNCTION__, setup->oneoverarea, area, det );
595 */
596 if (is_inf_or_nan(setup->oneoverarea))
597 return FALSE;
598 }
599
600 /* We need to know if this is a front or back-facing triangle for:
601 * - the GLSL gl_FrontFacing fragment attribute (bool)
602 * - two-sided stencil test
603 */
604 setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
605
606 return TRUE;
607 }
608
609
610 /**
611 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
612 * The value value comes from vertex[slot][i].
613 * The result will be put into setup->coef[slot].a0[i].
614 * \param slot which attribute slot
615 * \param i which component of the slot (0..3)
616 */
617 static void const_coeff( struct setup_context *setup,
618 struct tgsi_interp_coef *coef,
619 uint vertSlot, uint i)
620 {
621 assert(i <= 3);
622
623 coef->dadx[i] = 0;
624 coef->dady[i] = 0;
625
626 /* need provoking vertex info!
627 */
628 coef->a0[i] = setup->vprovoke[vertSlot][i];
629 }
630
631
632 /**
633 * Compute a0, dadx and dady for a linearly interpolated coefficient,
634 * for a triangle.
635 */
636 static void tri_linear_coeff( struct setup_context *setup,
637 struct tgsi_interp_coef *coef,
638 uint vertSlot, uint i)
639 {
640 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
641 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
642 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
643 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
644 float dadx = a * setup->oneoverarea;
645 float dady = b * setup->oneoverarea;
646
647 assert(i <= 3);
648
649 coef->dadx[i] = dadx;
650 coef->dady[i] = dady;
651
652 /* calculate a0 as the value which would be sampled for the
653 * fragment at (0,0), taking into account that we want to sample at
654 * pixel centers, in other words (0.5, 0.5).
655 *
656 * this is neat but unfortunately not a good way to do things for
657 * triangles with very large values of dadx or dady as it will
658 * result in the subtraction and re-addition from a0 of a very
659 * large number, which means we'll end up loosing a lot of the
660 * fractional bits and precision from a0. the way to fix this is
661 * to define a0 as the sample at a pixel center somewhere near vmin
662 * instead - i'll switch to this later.
663 */
664 coef->a0[i] = (setup->vmin[vertSlot][i] -
665 (dadx * (setup->vmin[0][0] - 0.5f) +
666 dady * (setup->vmin[0][1] - 0.5f)));
667
668 /*
669 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
670 slot, "xyzw"[i],
671 setup->coef[slot].a0[i],
672 setup->coef[slot].dadx[i],
673 setup->coef[slot].dady[i]);
674 */
675 }
676
677
678 /**
679 * Compute a0, dadx and dady for a perspective-corrected interpolant,
680 * for a triangle.
681 * We basically multiply the vertex value by 1/w before computing
682 * the plane coefficients (a0, dadx, dady).
683 * Later, when we compute the value at a particular fragment position we'll
684 * divide the interpolated value by the interpolated W at that fragment.
685 */
686 static void tri_persp_coeff( struct setup_context *setup,
687 struct tgsi_interp_coef *coef,
688 uint vertSlot, uint i)
689 {
690 /* premultiply by 1/w (v[0][3] is always W):
691 */
692 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
693 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
694 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
695 float botda = mida - mina;
696 float majda = maxa - mina;
697 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
698 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
699 float dadx = a * setup->oneoverarea;
700 float dady = b * setup->oneoverarea;
701
702 /*
703 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
704 setup->vmin[vertSlot][i],
705 setup->vmid[vertSlot][i],
706 setup->vmax[vertSlot][i]
707 );
708 */
709 assert(i <= 3);
710
711 coef->dadx[i] = dadx;
712 coef->dady[i] = dady;
713 coef->a0[i] = (mina -
714 (dadx * (setup->vmin[0][0] - 0.5f) +
715 dady * (setup->vmin[0][1] - 0.5f)));
716 }
717
718
719 /**
720 * Special coefficient setup for gl_FragCoord.
721 * X and Y are trivial, though Y has to be inverted for OpenGL.
722 * Z and W are copied from posCoef which should have already been computed.
723 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
724 */
725 static void
726 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
727 {
728 /*X*/
729 setup->coef[slot].a0[0] = 0;
730 setup->coef[slot].dadx[0] = 1.0;
731 setup->coef[slot].dady[0] = 0.0;
732 /*Y*/
733 if (setup->softpipe->rasterizer->origin_lower_left) {
734 /* y=0=bottom */
735 const int winHeight = setup->softpipe->framebuffer.height;
736 setup->coef[slot].a0[1] = (float) (winHeight - 1);
737 setup->coef[slot].dady[1] = -1.0;
738 }
739 else {
740 /* y=0=top */
741 setup->coef[slot].a0[1] = 0.0;
742 setup->coef[slot].dady[1] = 1.0;
743 }
744 setup->coef[slot].dadx[1] = 0.0;
745 /*Z*/
746 setup->coef[slot].a0[2] = setup->posCoef.a0[2];
747 setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
748 setup->coef[slot].dady[2] = setup->posCoef.dady[2];
749 /*W*/
750 setup->coef[slot].a0[3] = setup->posCoef.a0[3];
751 setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
752 setup->coef[slot].dady[3] = setup->posCoef.dady[3];
753 }
754
755
756
757 /**
758 * Compute the setup->coef[] array dadx, dady, a0 values.
759 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
760 */
761 static void setup_tri_coefficients( struct setup_context *setup )
762 {
763 struct softpipe_context *softpipe = setup->softpipe;
764 const struct sp_fragment_shader *spfs = softpipe->fs;
765 const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
766 uint fragSlot;
767
768 /* z and w are done by linear interpolation:
769 */
770 tri_linear_coeff(setup, &setup->posCoef, 0, 2);
771 tri_linear_coeff(setup, &setup->posCoef, 0, 3);
772
773 /* setup interpolation for all the remaining attributes:
774 */
775 for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
776 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
777 uint j;
778
779 switch (vinfo->attrib[fragSlot].interp_mode) {
780 case INTERP_CONSTANT:
781 for (j = 0; j < NUM_CHANNELS; j++)
782 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
783 break;
784 case INTERP_LINEAR:
785 for (j = 0; j < NUM_CHANNELS; j++)
786 tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
787 break;
788 case INTERP_PERSPECTIVE:
789 for (j = 0; j < NUM_CHANNELS; j++)
790 tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
791 break;
792 case INTERP_POS:
793 setup_fragcoord_coeff(setup, fragSlot);
794 break;
795 default:
796 assert(0);
797 }
798
799 if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
800 /* FOG.y = front/back facing XXX fix this */
801 setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
802 setup->coef[fragSlot].dadx[1] = 0.0;
803 setup->coef[fragSlot].dady[1] = 0.0;
804 }
805 }
806 }
807
808
809
810 static void setup_tri_edges( struct setup_context *setup )
811 {
812 float vmin_x = setup->vmin[0][0] + 0.5f;
813 float vmid_x = setup->vmid[0][0] + 0.5f;
814
815 float vmin_y = setup->vmin[0][1] - 0.5f;
816 float vmid_y = setup->vmid[0][1] - 0.5f;
817 float vmax_y = setup->vmax[0][1] - 0.5f;
818
819 setup->emaj.sy = ceilf(vmin_y);
820 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
821 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
822 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
823
824 setup->etop.sy = ceilf(vmid_y);
825 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
826 setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
827 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
828
829 setup->ebot.sy = ceilf(vmin_y);
830 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
831 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
832 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
833 }
834
835
836 /**
837 * Render the upper or lower half of a triangle.
838 * Scissoring/cliprect is applied here too.
839 */
840 static void subtriangle( struct setup_context *setup,
841 struct edge *eleft,
842 struct edge *eright,
843 unsigned lines )
844 {
845 const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
846 const int minx = (int) cliprect->minx;
847 const int maxx = (int) cliprect->maxx;
848 const int miny = (int) cliprect->miny;
849 const int maxy = (int) cliprect->maxy;
850 int y, start_y, finish_y;
851 int sy = (int)eleft->sy;
852
853 assert((int)eleft->sy == (int) eright->sy);
854
855 /* clip top/bottom */
856 start_y = sy;
857 finish_y = sy + lines;
858
859 if (start_y < miny)
860 start_y = miny;
861
862 if (finish_y > maxy)
863 finish_y = maxy;
864
865 start_y -= sy;
866 finish_y -= sy;
867
868 /*
869 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
870 */
871
872 for (y = start_y; y < finish_y; y++) {
873
874 /* avoid accumulating adds as floats don't have the precision to
875 * accurately iterate large triangle edges that way. luckily we
876 * can just multiply these days.
877 *
878 * this is all drowned out by the attribute interpolation anyway.
879 */
880 int left = (int)(eleft->sx + y * eleft->dxdy);
881 int right = (int)(eright->sx + y * eright->dxdy);
882
883 /* clip left/right */
884 if (left < minx)
885 left = minx;
886 if (right > maxx)
887 right = maxx;
888
889 if (left < right) {
890 int _y = sy + y;
891 if (block(_y) != setup->span.y) {
892 flush_spans(setup);
893 setup->span.y = block(_y);
894 }
895
896 setup->span.left[_y&1] = left;
897 setup->span.right[_y&1] = right;
898 setup->span.y_flags |= 1<<(_y&1);
899 }
900 }
901
902
903 /* save the values so that emaj can be restarted:
904 */
905 eleft->sx += lines * eleft->dxdy;
906 eright->sx += lines * eright->dxdy;
907 eleft->sy += lines;
908 eright->sy += lines;
909 }
910
911
912 /**
913 * Recalculate prim's determinant. This is needed as we don't have
914 * get this information through the vbuf_render interface & we must
915 * calculate it here.
916 */
917 static float
918 calc_det( const float (*v0)[4],
919 const float (*v1)[4],
920 const float (*v2)[4] )
921 {
922 /* edge vectors e = v0 - v2, f = v1 - v2 */
923 const float ex = v0[0][0] - v2[0][0];
924 const float ey = v0[0][1] - v2[0][1];
925 const float fx = v1[0][0] - v2[0][0];
926 const float fy = v1[0][1] - v2[0][1];
927
928 /* det = cross(e,f).z */
929 return ex * fy - ey * fx;
930 }
931
932
933 /**
934 * Do setup for triangle rasterization, then render the triangle.
935 */
936 void setup_tri( struct setup_context *setup,
937 const float (*v0)[4],
938 const float (*v1)[4],
939 const float (*v2)[4] )
940 {
941 float det;
942
943 #if DEBUG_VERTS
944 debug_printf("Setup triangle:\n");
945 print_vertex(setup, v0);
946 print_vertex(setup, v1);
947 print_vertex(setup, v2);
948 #endif
949
950 if (setup->softpipe->no_rast)
951 return;
952
953 det = calc_det(v0, v1, v2);
954 /*
955 debug_printf("%s\n", __FUNCTION__ );
956 */
957
958 #if DEBUG_FRAGS
959 setup->numFragsEmitted = 0;
960 setup->numFragsWritten = 0;
961 #endif
962
963 if (cull_tri( setup, det ))
964 return;
965
966 if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
967 return;
968 setup_tri_coefficients( setup );
969 setup_tri_edges( setup );
970
971 setup->quad.input.prim = PRIM_TRI;
972
973 setup->span.y = 0;
974 setup->span.y_flags = 0;
975 setup->span.right[0] = 0;
976 setup->span.right[1] = 0;
977 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
978
979 /* init_constant_attribs( setup ); */
980
981 if (setup->oneoverarea < 0.0) {
982 /* emaj on left:
983 */
984 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
985 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
986 }
987 else {
988 /* emaj on right:
989 */
990 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
991 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
992 }
993
994 flush_spans( setup );
995
996 WAIT_FOR_COMPLETION(setup);
997
998 #if DEBUG_FRAGS
999 printf("Tri: %u frags emitted, %u written\n",
1000 setup->numFragsEmitted,
1001 setup->numFragsWritten);
1002 #endif
1003 }
1004
1005
1006
1007 /**
1008 * Compute a0, dadx and dady for a linearly interpolated coefficient,
1009 * for a line.
1010 */
1011 static void
1012 line_linear_coeff(struct setup_context *setup,
1013 struct tgsi_interp_coef *coef,
1014 uint vertSlot, uint i)
1015 {
1016 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
1017 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
1018 const float dady = da * setup->emaj.dy * setup->oneoverarea;
1019 coef->dadx[i] = dadx;
1020 coef->dady[i] = dady;
1021 coef->a0[i] = (setup->vmin[vertSlot][i] -
1022 (dadx * (setup->vmin[0][0] - 0.5f) +
1023 dady * (setup->vmin[0][1] - 0.5f)));
1024 }
1025
1026
1027 /**
1028 * Compute a0, dadx and dady for a perspective-corrected interpolant,
1029 * for a line.
1030 */
1031 static void
1032 line_persp_coeff(struct setup_context *setup,
1033 struct tgsi_interp_coef *coef,
1034 uint vertSlot, uint i)
1035 {
1036 /* XXX double-check/verify this arithmetic */
1037 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
1038 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
1039 const float da = a1 - a0;
1040 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
1041 const float dady = da * setup->emaj.dy * setup->oneoverarea;
1042 coef->dadx[i] = dadx;
1043 coef->dady[i] = dady;
1044 coef->a0[i] = (setup->vmin[vertSlot][i] -
1045 (dadx * (setup->vmin[0][0] - 0.5f) +
1046 dady * (setup->vmin[0][1] - 0.5f)));
1047 }
1048
1049
1050 /**
1051 * Compute the setup->coef[] array dadx, dady, a0 values.
1052 * Must be called after setup->vmin,vmax are initialized.
1053 */
1054 static INLINE boolean
1055 setup_line_coefficients(struct setup_context *setup,
1056 const float (*v0)[4],
1057 const float (*v1)[4])
1058 {
1059 struct softpipe_context *softpipe = setup->softpipe;
1060 const struct sp_fragment_shader *spfs = softpipe->fs;
1061 const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
1062 uint fragSlot;
1063 float area;
1064
1065 /* use setup->vmin, vmax to point to vertices */
1066 setup->vprovoke = v1;
1067 setup->vmin = v0;
1068 setup->vmax = v1;
1069
1070 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
1071 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
1072
1073 /* NOTE: this is not really area but something proportional to it */
1074 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
1075 if (area == 0.0f || is_inf_or_nan(area))
1076 return FALSE;
1077 setup->oneoverarea = 1.0f / area;
1078
1079 /* z and w are done by linear interpolation:
1080 */
1081 line_linear_coeff(setup, &setup->posCoef, 0, 2);
1082 line_linear_coeff(setup, &setup->posCoef, 0, 3);
1083
1084 /* setup interpolation for all the remaining attributes:
1085 */
1086 for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
1087 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1088 uint j;
1089
1090 switch (vinfo->attrib[fragSlot].interp_mode) {
1091 case INTERP_CONSTANT:
1092 for (j = 0; j < NUM_CHANNELS; j++)
1093 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1094 break;
1095 case INTERP_LINEAR:
1096 for (j = 0; j < NUM_CHANNELS; j++)
1097 line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1098 break;
1099 case INTERP_PERSPECTIVE:
1100 for (j = 0; j < NUM_CHANNELS; j++)
1101 line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1102 break;
1103 case INTERP_POS:
1104 setup_fragcoord_coeff(setup, fragSlot);
1105 break;
1106 default:
1107 assert(0);
1108 }
1109
1110 if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
1111 /* FOG.y = front/back facing XXX fix this */
1112 setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
1113 setup->coef[fragSlot].dadx[1] = 0.0;
1114 setup->coef[fragSlot].dady[1] = 0.0;
1115 }
1116 }
1117 return TRUE;
1118 }
1119
1120
1121 /**
1122 * Plot a pixel in a line segment.
1123 */
1124 static INLINE void
1125 plot(struct setup_context *setup, int x, int y)
1126 {
1127 const int iy = y & 1;
1128 const int ix = x & 1;
1129 const int quadX = x - ix;
1130 const int quadY = y - iy;
1131 const int mask = (1 << ix) << (2 * iy);
1132
1133 if (quadX != setup->quad.input.x0 ||
1134 quadY != setup->quad.input.y0)
1135 {
1136 /* flush prev quad, start new quad */
1137
1138 if (setup->quad.input.x0 != -1)
1139 CLIP_EMIT_QUAD(setup);
1140
1141 setup->quad.input.x0 = quadX;
1142 setup->quad.input.y0 = quadY;
1143 setup->quad.inout.mask = 0x0;
1144 }
1145
1146 setup->quad.inout.mask |= mask;
1147 }
1148
1149
1150 /**
1151 * Do setup for line rasterization, then render the line.
1152 * Single-pixel width, no stipple, etc. We rely on the 'draw' module
1153 * to handle stippling and wide lines.
1154 */
1155 void
1156 setup_line(struct setup_context *setup,
1157 const float (*v0)[4],
1158 const float (*v1)[4])
1159 {
1160 int x0 = (int) v0[0][0];
1161 int x1 = (int) v1[0][0];
1162 int y0 = (int) v0[0][1];
1163 int y1 = (int) v1[0][1];
1164 int dx = x1 - x0;
1165 int dy = y1 - y0;
1166 int xstep, ystep;
1167
1168 #if DEBUG_VERTS
1169 debug_printf("Setup line:\n");
1170 print_vertex(setup, v0);
1171 print_vertex(setup, v1);
1172 #endif
1173
1174 if (setup->softpipe->no_rast)
1175 return;
1176
1177 if (dx == 0 && dy == 0)
1178 return;
1179
1180 if (!setup_line_coefficients(setup, v0, v1))
1181 return;
1182
1183 assert(v0[0][0] < 1.0e9);
1184 assert(v0[0][1] < 1.0e9);
1185 assert(v1[0][0] < 1.0e9);
1186 assert(v1[0][1] < 1.0e9);
1187
1188 if (dx < 0) {
1189 dx = -dx; /* make positive */
1190 xstep = -1;
1191 }
1192 else {
1193 xstep = 1;
1194 }
1195
1196 if (dy < 0) {
1197 dy = -dy; /* make positive */
1198 ystep = -1;
1199 }
1200 else {
1201 ystep = 1;
1202 }
1203
1204 assert(dx >= 0);
1205 assert(dy >= 0);
1206
1207 setup->quad.input.x0 = setup->quad.input.y0 = -1;
1208 setup->quad.inout.mask = 0x0;
1209 setup->quad.input.prim = PRIM_LINE;
1210 /* XXX temporary: set coverage to 1.0 so the line appears
1211 * if AA mode happens to be enabled.
1212 */
1213 setup->quad.input.coverage[0] =
1214 setup->quad.input.coverage[1] =
1215 setup->quad.input.coverage[2] =
1216 setup->quad.input.coverage[3] = 1.0;
1217
1218 if (dx > dy) {
1219 /*** X-major line ***/
1220 int i;
1221 const int errorInc = dy + dy;
1222 int error = errorInc - dx;
1223 const int errorDec = error - dx;
1224
1225 for (i = 0; i < dx; i++) {
1226 plot(setup, x0, y0);
1227
1228 x0 += xstep;
1229 if (error < 0) {
1230 error += errorInc;
1231 }
1232 else {
1233 error += errorDec;
1234 y0 += ystep;
1235 }
1236 }
1237 }
1238 else {
1239 /*** Y-major line ***/
1240 int i;
1241 const int errorInc = dx + dx;
1242 int error = errorInc - dy;
1243 const int errorDec = error - dy;
1244
1245 for (i = 0; i < dy; i++) {
1246 plot(setup, x0, y0);
1247
1248 y0 += ystep;
1249 if (error < 0) {
1250 error += errorInc;
1251 }
1252 else {
1253 error += errorDec;
1254 x0 += xstep;
1255 }
1256 }
1257 }
1258
1259 /* draw final quad */
1260 if (setup->quad.inout.mask) {
1261 CLIP_EMIT_QUAD(setup);
1262 }
1263
1264 WAIT_FOR_COMPLETION(setup);
1265 }
1266
1267
1268 static void
1269 point_persp_coeff(struct setup_context *setup,
1270 const float (*vert)[4],
1271 struct tgsi_interp_coef *coef,
1272 uint vertSlot, uint i)
1273 {
1274 assert(i <= 3);
1275 coef->dadx[i] = 0.0F;
1276 coef->dady[i] = 0.0F;
1277 coef->a0[i] = vert[vertSlot][i] * vert[0][3];
1278 }
1279
1280
1281 /**
1282 * Do setup for point rasterization, then render the point.
1283 * Round or square points...
1284 * XXX could optimize a lot for 1-pixel points.
1285 */
1286 void
1287 setup_point( struct setup_context *setup,
1288 const float (*v0)[4] )
1289 {
1290 struct softpipe_context *softpipe = setup->softpipe;
1291 const struct sp_fragment_shader *spfs = softpipe->fs;
1292 const int sizeAttr = setup->softpipe->psize_slot;
1293 const float size
1294 = sizeAttr > 0 ? v0[sizeAttr][0]
1295 : setup->softpipe->rasterizer->point_size;
1296 const float halfSize = 0.5F * size;
1297 const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
1298 const float x = v0[0][0]; /* Note: data[0] is always position */
1299 const float y = v0[0][1];
1300 const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
1301 uint fragSlot;
1302
1303 #if DEBUG_VERTS
1304 debug_printf("Setup point:\n");
1305 print_vertex(setup, v0);
1306 #endif
1307
1308 if (softpipe->no_rast)
1309 return;
1310
1311 /* For points, all interpolants are constant-valued.
1312 * However, for point sprites, we'll need to setup texcoords appropriately.
1313 * XXX: which coefficients are the texcoords???
1314 * We may do point sprites as textured quads...
1315 *
1316 * KW: We don't know which coefficients are texcoords - ultimately
1317 * the choice of what interpolation mode to use for each attribute
1318 * should be determined by the fragment program, using
1319 * per-attribute declaration statements that include interpolation
1320 * mode as a parameter. So either the fragment program will have
1321 * to be adjusted for pointsprite vs normal point behaviour, or
1322 * otherwise a special interpolation mode will have to be defined
1323 * which matches the required behaviour for point sprites. But -
1324 * the latter is not a feature of normal hardware, and as such
1325 * probably should be ruled out on that basis.
1326 */
1327 setup->vprovoke = v0;
1328
1329 /* setup Z, W */
1330 const_coeff(setup, &setup->posCoef, 0, 2);
1331 const_coeff(setup, &setup->posCoef, 0, 3);
1332
1333 for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
1334 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1335 uint j;
1336
1337 switch (vinfo->attrib[fragSlot].interp_mode) {
1338 case INTERP_CONSTANT:
1339 /* fall-through */
1340 case INTERP_LINEAR:
1341 for (j = 0; j < NUM_CHANNELS; j++)
1342 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1343 break;
1344 case INTERP_PERSPECTIVE:
1345 for (j = 0; j < NUM_CHANNELS; j++)
1346 point_persp_coeff(setup, setup->vprovoke,
1347 &setup->coef[fragSlot], vertSlot, j);
1348 break;
1349 case INTERP_POS:
1350 setup_fragcoord_coeff(setup, fragSlot);
1351 break;
1352 default:
1353 assert(0);
1354 }
1355
1356 if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) {
1357 /* FOG.y = front/back facing XXX fix this */
1358 setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing;
1359 setup->coef[fragSlot].dadx[1] = 0.0;
1360 setup->coef[fragSlot].dady[1] = 0.0;
1361 }
1362 }
1363
1364 setup->quad.input.prim = PRIM_POINT;
1365
1366 if (halfSize <= 0.5 && !round) {
1367 /* special case for 1-pixel points */
1368 const int ix = ((int) x) & 1;
1369 const int iy = ((int) y) & 1;
1370 setup->quad.input.x0 = (int) x - ix;
1371 setup->quad.input.y0 = (int) y - iy;
1372 setup->quad.inout.mask = (1 << ix) << (2 * iy);
1373 CLIP_EMIT_QUAD(setup);
1374 }
1375 else {
1376 if (round) {
1377 /* rounded points */
1378 const int ixmin = block((int) (x - halfSize));
1379 const int ixmax = block((int) (x + halfSize));
1380 const int iymin = block((int) (y - halfSize));
1381 const int iymax = block((int) (y + halfSize));
1382 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */
1383 const float rmax = halfSize + 0.7071F;
1384 const float rmin2 = MAX2(0.0F, rmin * rmin);
1385 const float rmax2 = rmax * rmax;
1386 const float cscale = 1.0F / (rmax2 - rmin2);
1387 int ix, iy;
1388
1389 for (iy = iymin; iy <= iymax; iy += 2) {
1390 for (ix = ixmin; ix <= ixmax; ix += 2) {
1391 float dx, dy, dist2, cover;
1392
1393 setup->quad.inout.mask = 0x0;
1394
1395 dx = (ix + 0.5f) - x;
1396 dy = (iy + 0.5f) - y;
1397 dist2 = dx * dx + dy * dy;
1398 if (dist2 <= rmax2) {
1399 cover = 1.0F - (dist2 - rmin2) * cscale;
1400 setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1401 setup->quad.inout.mask |= MASK_TOP_LEFT;
1402 }
1403
1404 dx = (ix + 1.5f) - x;
1405 dy = (iy + 0.5f) - y;
1406 dist2 = dx * dx + dy * dy;
1407 if (dist2 <= rmax2) {
1408 cover = 1.0F - (dist2 - rmin2) * cscale;
1409 setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1410 setup->quad.inout.mask |= MASK_TOP_RIGHT;
1411 }
1412
1413 dx = (ix + 0.5f) - x;
1414 dy = (iy + 1.5f) - y;
1415 dist2 = dx * dx + dy * dy;
1416 if (dist2 <= rmax2) {
1417 cover = 1.0F - (dist2 - rmin2) * cscale;
1418 setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1419 setup->quad.inout.mask |= MASK_BOTTOM_LEFT;
1420 }
1421
1422 dx = (ix + 1.5f) - x;
1423 dy = (iy + 1.5f) - y;
1424 dist2 = dx * dx + dy * dy;
1425 if (dist2 <= rmax2) {
1426 cover = 1.0F - (dist2 - rmin2) * cscale;
1427 setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1428 setup->quad.inout.mask |= MASK_BOTTOM_RIGHT;
1429 }
1430
1431 if (setup->quad.inout.mask) {
1432 setup->quad.input.x0 = ix;
1433 setup->quad.input.y0 = iy;
1434 CLIP_EMIT_QUAD(setup);
1435 }
1436 }
1437 }
1438 }
1439 else {
1440 /* square points */
1441 const int xmin = (int) (x + 0.75 - halfSize);
1442 const int ymin = (int) (y + 0.25 - halfSize);
1443 const int xmax = xmin + (int) size;
1444 const int ymax = ymin + (int) size;
1445 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1446 const int ixmin = block(xmin);
1447 const int ixmax = block(xmax - 1);
1448 const int iymin = block(ymin);
1449 const int iymax = block(ymax - 1);
1450 int ix, iy;
1451
1452 /*
1453 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1454 */
1455 for (iy = iymin; iy <= iymax; iy += 2) {
1456 uint rowMask = 0xf;
1457 if (iy < ymin) {
1458 /* above the top edge */
1459 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1460 }
1461 if (iy + 1 >= ymax) {
1462 /* below the bottom edge */
1463 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1464 }
1465
1466 for (ix = ixmin; ix <= ixmax; ix += 2) {
1467 uint mask = rowMask;
1468
1469 if (ix < xmin) {
1470 /* fragment is past left edge of point, turn off left bits */
1471 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1472 }
1473 if (ix + 1 >= xmax) {
1474 /* past the right edge */
1475 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1476 }
1477
1478 setup->quad.inout.mask = mask;
1479 setup->quad.input.x0 = ix;
1480 setup->quad.input.y0 = iy;
1481 CLIP_EMIT_QUAD(setup);
1482 }
1483 }
1484 }
1485 }
1486
1487 WAIT_FOR_COMPLETION(setup);
1488 }
1489
1490 void setup_prepare( struct setup_context *setup )
1491 {
1492 struct softpipe_context *sp = setup->softpipe;
1493 unsigned i;
1494
1495 if (sp->dirty) {
1496 softpipe_update_derived(sp);
1497 }
1498
1499 /* Mark surfaces as defined now */
1500 for (i = 0; i < sp->framebuffer.nr_cbufs; i++){
1501 if (sp->framebuffer.cbufs[i]) {
1502 sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED;
1503 }
1504 }
1505 if (sp->framebuffer.zsbuf) {
1506 sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED;
1507 }
1508
1509 /* Note: nr_attrs is only used for debugging (vertex printing) */
1510 setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw);
1511
1512 for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
1513 sp->quad[i].first->begin( sp->quad[i].first );
1514 }
1515
1516 if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1517 sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1518 sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1519 /* we'll do culling */
1520 setup->winding = sp->rasterizer->cull_mode;
1521 }
1522 else {
1523 /* 'draw' will do culling */
1524 setup->winding = PIPE_WINDING_NONE;
1525 }
1526 }
1527
1528
1529
1530 void setup_destroy_context( struct setup_context *setup )
1531 {
1532 FREE( setup );
1533 }
1534
1535
1536 /**
1537 * Create a new primitive setup/render stage.
1538 */
1539 struct setup_context *setup_create_context( struct softpipe_context *softpipe )
1540 {
1541 struct setup_context *setup = CALLOC_STRUCT(setup_context);
1542 #if SP_NUM_QUAD_THREADS > 1
1543 uint i;
1544 #endif
1545
1546 setup->softpipe = softpipe;
1547
1548 setup->quad.coef = setup->coef;
1549 setup->quad.posCoef = &setup->posCoef;
1550
1551 #if SP_NUM_QUAD_THREADS > 1
1552 setup->que.first = 0;
1553 setup->que.last = 0;
1554 pipe_mutex_init( setup->que.que_mutex );
1555 pipe_condvar_init( setup->que.que_notfull_condvar );
1556 pipe_condvar_init( setup->que.que_notempty_condvar );
1557 setup->que.jobs_added = 0;
1558 setup->que.jobs_done = 0;
1559 pipe_condvar_init( setup->que.que_done_condvar );
1560 for (i = 0; i < SP_NUM_QUAD_THREADS; i++) {
1561 setup->threads[i].setup = setup;
1562 setup->threads[i].id = i;
1563 setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] );
1564 }
1565 #endif
1566
1567 return setup;
1568 }
1569