21e769931a92d82b38e8ba75028c6dc1a2178226
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * \brief Primitive rasterization/rendering (points, lines, triangles)
30 *
31 * \author Keith Whitwell <keith@tungstengraphics.com>
32 * \author Brian Paul
33 */
34
35 #include "lp_context.h"
36 #include "lp_prim_setup.h"
37 #include "lp_quad.h"
38 #include "lp_quad_pipe.h"
39 #include "lp_setup.h"
40 #include "lp_state.h"
41 #include "draw/draw_context.h"
42 #include "draw/draw_private.h"
43 #include "draw/draw_vertex.h"
44 #include "pipe/p_shader_tokens.h"
45 #include "pipe/p_thread.h"
46 #include "util/u_math.h"
47 #include "util/u_memory.h"
48
49
50 #define DEBUG_VERTS 0
51 #define DEBUG_FRAGS 0
52
53 /**
54 * Triangle edge info
55 */
56 struct edge {
57 float dx; /**< X(v1) - X(v0), used only during setup */
58 float dy; /**< Y(v1) - Y(v0), used only during setup */
59 float dxdy; /**< dx/dy */
60 float sx, sy; /**< first sample point coord */
61 int lines; /**< number of lines on this edge */
62 };
63
64 #if LP_NUM_QUAD_THREADS > 1
65
66 /* Set to 1 if you want other threads to be instantly
67 * notified of pending jobs.
68 */
69 #define INSTANT_NOTEMPTY_NOTIFY 0
70
71 struct thread_info
72 {
73 struct setup_context *setup;
74 uint id;
75 pipe_thread handle;
76 };
77
78 struct quad_job;
79
80 typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job );
81
82 struct quad_job
83 {
84 struct quad_header_input input;
85 struct quad_header_inout inout;
86 quad_job_routine routine;
87 };
88
89 #define NUM_QUAD_JOBS 64
90
91 struct quad_job_que
92 {
93 struct quad_job jobs[NUM_QUAD_JOBS];
94 uint first;
95 uint last;
96 pipe_mutex que_mutex;
97 pipe_condvar que_notfull_condvar;
98 pipe_condvar que_notempty_condvar;
99 uint jobs_added;
100 uint jobs_done;
101 pipe_condvar que_done_condvar;
102 };
103
104 static void
105 add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine )
106 {
107 #if INSTANT_NOTEMPTY_NOTIFY
108 boolean empty;
109 #endif
110
111 /* Wait for empty slot, see if the que is empty.
112 */
113 pipe_mutex_lock( que->que_mutex );
114 while ((que->last + 1) % NUM_QUAD_JOBS == que->first) {
115 #if !INSTANT_NOTEMPTY_NOTIFY
116 pipe_condvar_broadcast( que->que_notempty_condvar );
117 #endif
118 pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex );
119 }
120 #if INSTANT_NOTEMPTY_NOTIFY
121 empty = que->last == que->first;
122 #endif
123 que->jobs_added++;
124 pipe_mutex_unlock( que->que_mutex );
125
126 /* Submit new job.
127 */
128 que->jobs[que->last].input = quad->input;
129 que->jobs[que->last].inout = quad->inout;
130 que->jobs[que->last].routine = routine;
131 que->last = (que->last + 1) % NUM_QUAD_JOBS;
132
133 #if INSTANT_NOTEMPTY_NOTIFY
134 /* If the que was empty, notify consumers there's a job to be done.
135 */
136 if (empty) {
137 pipe_mutex_lock( que->que_mutex );
138 pipe_condvar_broadcast( que->que_notempty_condvar );
139 pipe_mutex_unlock( que->que_mutex );
140 }
141 #endif
142 }
143
144 #endif
145
146 /**
147 * Triangle setup info (derived from draw_stage).
148 * Also used for line drawing (taking some liberties).
149 */
150 struct setup_context {
151 struct llvmpipe_context *llvmpipe;
152
153 /* Vertices are just an array of floats making up each attribute in
154 * turn. Currently fixed at 4 floats, but should change in time.
155 * Codegen will help cope with this.
156 */
157 const float (*vmax)[4];
158 const float (*vmid)[4];
159 const float (*vmin)[4];
160 const float (*vprovoke)[4];
161
162 struct edge ebot;
163 struct edge etop;
164 struct edge emaj;
165
166 float oneoverarea;
167
168 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
169 struct tgsi_interp_coef posCoef; /* For Z, W */
170 struct quad_header quad;
171
172 #if LP_NUM_QUAD_THREADS > 1
173 struct quad_job_que que;
174 struct thread_info threads[LP_NUM_QUAD_THREADS];
175 #endif
176
177 struct {
178 int left[2]; /**< [0] = row0, [1] = row1 */
179 int right[2];
180 int y;
181 } span;
182
183 #if DEBUG_FRAGS
184 uint numFragsEmitted; /**< per primitive */
185 uint numFragsWritten; /**< per primitive */
186 #endif
187
188 unsigned winding; /* which winding to cull */
189 };
190
191 #if LP_NUM_QUAD_THREADS > 1
192
193 static PIPE_THREAD_ROUTINE( quad_thread, param )
194 {
195 struct thread_info *info = (struct thread_info *) param;
196 struct quad_job_que *que = &info->setup->que;
197
198 for (;;) {
199 struct quad_job job;
200 boolean full;
201
202 /* Wait for an available job.
203 */
204 pipe_mutex_lock( que->que_mutex );
205 while (que->last == que->first)
206 pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex );
207
208 /* See if the que is full.
209 */
210 full = (que->last + 1) % NUM_QUAD_JOBS == que->first;
211
212 /* Take a job and remove it from que.
213 */
214 job = que->jobs[que->first];
215 que->first = (que->first + 1) % NUM_QUAD_JOBS;
216
217 /* Notify the producer if the que is not full.
218 */
219 if (full)
220 pipe_condvar_signal( que->que_notfull_condvar );
221 pipe_mutex_unlock( que->que_mutex );
222
223 job.routine( info->setup, info->id, &job );
224
225 /* Notify the producer if that's the last finished job.
226 */
227 pipe_mutex_lock( que->que_mutex );
228 que->jobs_done++;
229 if (que->jobs_added == que->jobs_done)
230 pipe_condvar_signal( que->que_done_condvar );
231 pipe_mutex_unlock( que->que_mutex );
232 }
233
234 return NULL;
235 }
236
237 #define WAIT_FOR_COMPLETION(setup) \
238 do {\
239 pipe_mutex_lock( setup->que.que_mutex );\
240 if (!INSTANT_NOTEMPTY_NOTIFY)\
241 pipe_condvar_broadcast( setup->que.que_notempty_condvar );\
242 while (setup->que.jobs_added != setup->que.jobs_done)\
243 pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\
244 pipe_mutex_unlock( setup->que.que_mutex );\
245 } while (0)
246
247 #else
248
249 #define WAIT_FOR_COMPLETION(setup) ((void) 0)
250
251 #endif
252
253
254
255 /**
256 * Do triangle cull test using tri determinant (sign indicates orientation)
257 * \return true if triangle is to be culled.
258 */
259 static INLINE boolean
260 cull_tri(const struct setup_context *setup, float det)
261 {
262 if (det != 0) {
263 /* if (det < 0 then Z points toward camera and triangle is
264 * counter-clockwise winding.
265 */
266 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
267
268 if ((winding & setup->winding) == 0)
269 return FALSE;
270 }
271
272 /* Culled:
273 */
274 return TRUE;
275 }
276
277
278
279 /**
280 * Clip setup->quad against the scissor/surface bounds.
281 */
282 static INLINE void
283 quad_clip( struct setup_context *setup, struct quad_header *quad )
284 {
285 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
286 const int minx = (int) cliprect->minx;
287 const int maxx = (int) cliprect->maxx;
288 const int miny = (int) cliprect->miny;
289 const int maxy = (int) cliprect->maxy;
290
291 if (quad->input.x0 >= maxx ||
292 quad->input.y0 >= maxy ||
293 quad->input.x0 + 1 < minx ||
294 quad->input.y0 + 1 < miny) {
295 /* totally clipped */
296 quad->inout.mask = 0x0;
297 return;
298 }
299 if (quad->input.x0 < minx)
300 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
301 if (quad->input.y0 < miny)
302 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
303 if (quad->input.x0 == maxx - 1)
304 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
305 if (quad->input.y0 == maxy - 1)
306 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
307 }
308
309
310 /**
311 * Emit a quad (pass to next stage) with clipping.
312 */
313 static INLINE void
314 clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
315 {
316 quad_clip( setup, quad );
317 if (quad->inout.mask) {
318 struct llvmpipe_context *lp = setup->llvmpipe;
319
320 lp->quad[thread].first->run( lp->quad[thread].first, quad );
321 }
322 }
323
324 #if LP_NUM_QUAD_THREADS > 1
325
326 static void
327 clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
328 {
329 struct quad_header quad;
330
331 quad.input = job->input;
332 quad.inout = job->inout;
333 quad.coef = setup->quad.coef;
334 quad.posCoef = setup->quad.posCoef;
335 quad.nr_attrs = setup->quad.nr_attrs;
336 clip_emit_quad( setup, &quad, thread );
337 }
338
339 #define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job )
340
341 #else
342
343 #define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 )
344
345 #endif
346
347 /**
348 * Emit a quad (pass to next stage). No clipping is done.
349 */
350 static INLINE void
351 emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread )
352 {
353 struct llvmpipe_context *lp = setup->llvmpipe;
354 #if DEBUG_FRAGS
355 uint mask = quad->inout.mask;
356 #endif
357
358 #if DEBUG_FRAGS
359 if (mask & 1) setup->numFragsEmitted++;
360 if (mask & 2) setup->numFragsEmitted++;
361 if (mask & 4) setup->numFragsEmitted++;
362 if (mask & 8) setup->numFragsEmitted++;
363 #endif
364 lp->quad[thread].first->run( lp->quad[thread].first, quad );
365 #if DEBUG_FRAGS
366 mask = quad->inout.mask;
367 if (mask & 1) setup->numFragsWritten++;
368 if (mask & 2) setup->numFragsWritten++;
369 if (mask & 4) setup->numFragsWritten++;
370 if (mask & 8) setup->numFragsWritten++;
371 #endif
372 }
373
374 #if LP_NUM_QUAD_THREADS > 1
375
376 static void
377 emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job )
378 {
379 struct quad_header quad;
380
381 quad.input = job->input;
382 quad.inout = job->inout;
383 quad.coef = setup->quad.coef;
384 quad.posCoef = setup->quad.posCoef;
385 quad.nr_attrs = setup->quad.nr_attrs;
386 emit_quad( setup, &quad, thread );
387 }
388
389 #define EMIT_QUAD(setup,x,y,qmask) do {\
390 setup->quad.input.x0 = x;\
391 setup->quad.input.y0 = y;\
392 setup->quad.inout.mask = qmask;\
393 add_quad_job( &setup->que, &setup->quad, emit_quad_job );\
394 } while (0)
395
396 #else
397
398 #define EMIT_QUAD(setup,x,y,qmask) do {\
399 setup->quad.input.x0 = x;\
400 setup->quad.input.y0 = y;\
401 setup->quad.inout.mask = qmask;\
402 emit_quad( setup, &setup->quad, 0 );\
403 } while (0)
404
405 #endif
406
407 /**
408 * Given an X or Y coordinate, return the block/quad coordinate that it
409 * belongs to.
410 */
411 static INLINE int block( int x )
412 {
413 return x & ~1;
414 }
415
416
417 /**
418 * Render a horizontal span of quads
419 */
420 static void flush_spans( struct setup_context *setup )
421 {
422 const int step = 30;
423 const int xleft0 = setup->span.left[0];
424 const int xleft1 = setup->span.left[1];
425 const int xright0 = setup->span.right[0];
426 const int xright1 = setup->span.right[1];
427
428 int minleft = block(MIN2(xleft0, xleft1));
429 int maxright = MAX2(xright0, xright1);
430 int x;
431
432 for (x = minleft; x < maxright; x += step) {
433 unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
434 unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
435 unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
436 unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
437 unsigned lx = x;
438
439 unsigned skipmask_left0 = (1U << skip_left0) - 1U;
440 unsigned skipmask_left1 = (1U << skip_left1) - 1U;
441
442 /* These calculations fail when step == 32 and skip_right == 0.
443 */
444 unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
445 unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
446
447 unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
448 unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
449
450 while (mask0 | mask1) {
451 unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
452 if (quadmask)
453 EMIT_QUAD( setup, lx, setup->span.y, quadmask );
454 mask0 >>= 2;
455 mask1 >>= 2;
456 lx += 2;
457 }
458 }
459
460
461 setup->span.y = 0;
462 setup->span.right[0] = 0;
463 setup->span.right[1] = 0;
464 setup->span.left[0] = 1000000; /* greater than right[0] */
465 setup->span.left[1] = 1000000; /* greater than right[1] */
466 }
467
468
469 #if DEBUG_VERTS
470 static void print_vertex(const struct setup_context *setup,
471 const float (*v)[4])
472 {
473 int i;
474 debug_printf(" Vertex: (%p)\n", v);
475 for (i = 0; i < setup->quad.nr_attrs; i++) {
476 debug_printf(" %d: %f %f %f %f\n", i,
477 v[i][0], v[i][1], v[i][2], v[i][3]);
478 if (util_is_inf_or_nan(v[i][0])) {
479 debug_printf(" NaN!\n");
480 }
481 }
482 }
483 #endif
484
485 /**
486 * Sort the vertices from top to bottom order, setting up the triangle
487 * edge fields (ebot, emaj, etop).
488 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
489 */
490 static boolean setup_sort_vertices( struct setup_context *setup,
491 float det,
492 const float (*v0)[4],
493 const float (*v1)[4],
494 const float (*v2)[4] )
495 {
496 setup->vprovoke = v2;
497
498 /* determine bottom to top order of vertices */
499 {
500 float y0 = v0[0][1];
501 float y1 = v1[0][1];
502 float y2 = v2[0][1];
503 if (y0 <= y1) {
504 if (y1 <= y2) {
505 /* y0<=y1<=y2 */
506 setup->vmin = v0;
507 setup->vmid = v1;
508 setup->vmax = v2;
509 }
510 else if (y2 <= y0) {
511 /* y2<=y0<=y1 */
512 setup->vmin = v2;
513 setup->vmid = v0;
514 setup->vmax = v1;
515 }
516 else {
517 /* y0<=y2<=y1 */
518 setup->vmin = v0;
519 setup->vmid = v2;
520 setup->vmax = v1;
521 }
522 }
523 else {
524 if (y0 <= y2) {
525 /* y1<=y0<=y2 */
526 setup->vmin = v1;
527 setup->vmid = v0;
528 setup->vmax = v2;
529 }
530 else if (y2 <= y1) {
531 /* y2<=y1<=y0 */
532 setup->vmin = v2;
533 setup->vmid = v1;
534 setup->vmax = v0;
535 }
536 else {
537 /* y1<=y2<=y0 */
538 setup->vmin = v1;
539 setup->vmid = v2;
540 setup->vmax = v0;
541 }
542 }
543 }
544
545 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
546 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
547 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
548 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
549 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
550 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
551
552 /*
553 * Compute triangle's area. Use 1/area to compute partial
554 * derivatives of attributes later.
555 *
556 * The area will be the same as prim->det, but the sign may be
557 * different depending on how the vertices get sorted above.
558 *
559 * To determine whether the primitive is front or back facing we
560 * use the prim->det value because its sign is correct.
561 */
562 {
563 const float area = (setup->emaj.dx * setup->ebot.dy -
564 setup->ebot.dx * setup->emaj.dy);
565
566 setup->oneoverarea = 1.0f / area;
567
568 /*
569 debug_printf("%s one-over-area %f area %f det %f\n",
570 __FUNCTION__, setup->oneoverarea, area, det );
571 */
572 if (util_is_inf_or_nan(setup->oneoverarea))
573 return FALSE;
574 }
575
576 /* We need to know if this is a front or back-facing triangle for:
577 * - the GLSL gl_FrontFacing fragment attribute (bool)
578 * - two-sided stencil test
579 */
580 setup->quad.input.facing = (det > 0.0) ^ (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW);
581
582 return TRUE;
583 }
584
585
586 /**
587 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
588 * The value value comes from vertex[slot][i].
589 * The result will be put into setup->coef[slot].a0[i].
590 * \param slot which attribute slot
591 * \param i which component of the slot (0..3)
592 */
593 static void const_coeff( struct setup_context *setup,
594 struct tgsi_interp_coef *coef,
595 uint vertSlot, uint i)
596 {
597 assert(i <= 3);
598
599 coef->dadx[i] = 0;
600 coef->dady[i] = 0;
601
602 /* need provoking vertex info!
603 */
604 coef->a0[i] = setup->vprovoke[vertSlot][i];
605 }
606
607
608 /**
609 * Compute a0, dadx and dady for a linearly interpolated coefficient,
610 * for a triangle.
611 */
612 static void tri_linear_coeff( struct setup_context *setup,
613 struct tgsi_interp_coef *coef,
614 uint vertSlot, uint i)
615 {
616 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
617 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
618 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
619 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
620 float dadx = a * setup->oneoverarea;
621 float dady = b * setup->oneoverarea;
622
623 assert(i <= 3);
624
625 coef->dadx[i] = dadx;
626 coef->dady[i] = dady;
627
628 /* calculate a0 as the value which would be sampled for the
629 * fragment at (0,0), taking into account that we want to sample at
630 * pixel centers, in other words (0.5, 0.5).
631 *
632 * this is neat but unfortunately not a good way to do things for
633 * triangles with very large values of dadx or dady as it will
634 * result in the subtraction and re-addition from a0 of a very
635 * large number, which means we'll end up loosing a lot of the
636 * fractional bits and precision from a0. the way to fix this is
637 * to define a0 as the sample at a pixel center somewhere near vmin
638 * instead - i'll switch to this later.
639 */
640 coef->a0[i] = (setup->vmin[vertSlot][i] -
641 (dadx * (setup->vmin[0][0] - 0.5f) +
642 dady * (setup->vmin[0][1] - 0.5f)));
643
644 /*
645 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
646 slot, "xyzw"[i],
647 setup->coef[slot].a0[i],
648 setup->coef[slot].dadx[i],
649 setup->coef[slot].dady[i]);
650 */
651 }
652
653
654 /**
655 * Compute a0, dadx and dady for a perspective-corrected interpolant,
656 * for a triangle.
657 * We basically multiply the vertex value by 1/w before computing
658 * the plane coefficients (a0, dadx, dady).
659 * Later, when we compute the value at a particular fragment position we'll
660 * divide the interpolated value by the interpolated W at that fragment.
661 */
662 static void tri_persp_coeff( struct setup_context *setup,
663 struct tgsi_interp_coef *coef,
664 uint vertSlot, uint i)
665 {
666 /* premultiply by 1/w (v[0][3] is always W):
667 */
668 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
669 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
670 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
671 float botda = mida - mina;
672 float majda = maxa - mina;
673 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
674 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
675 float dadx = a * setup->oneoverarea;
676 float dady = b * setup->oneoverarea;
677
678 /*
679 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
680 setup->vmin[vertSlot][i],
681 setup->vmid[vertSlot][i],
682 setup->vmax[vertSlot][i]
683 );
684 */
685 assert(i <= 3);
686
687 coef->dadx[i] = dadx;
688 coef->dady[i] = dady;
689 coef->a0[i] = (mina -
690 (dadx * (setup->vmin[0][0] - 0.5f) +
691 dady * (setup->vmin[0][1] - 0.5f)));
692 }
693
694
695 /**
696 * Special coefficient setup for gl_FragCoord.
697 * X and Y are trivial, though Y has to be inverted for OpenGL.
698 * Z and W are copied from posCoef which should have already been computed.
699 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
700 */
701 static void
702 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
703 {
704 /*X*/
705 setup->coef[slot].a0[0] = 0;
706 setup->coef[slot].dadx[0] = 1.0;
707 setup->coef[slot].dady[0] = 0.0;
708 /*Y*/
709 setup->coef[slot].a0[1] = 0.0;
710 setup->coef[slot].dadx[1] = 0.0;
711 setup->coef[slot].dady[1] = 1.0;
712 /*Z*/
713 setup->coef[slot].a0[2] = setup->posCoef.a0[2];
714 setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
715 setup->coef[slot].dady[2] = setup->posCoef.dady[2];
716 /*W*/
717 setup->coef[slot].a0[3] = setup->posCoef.a0[3];
718 setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
719 setup->coef[slot].dady[3] = setup->posCoef.dady[3];
720 }
721
722
723
724 /**
725 * Compute the setup->coef[] array dadx, dady, a0 values.
726 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
727 */
728 static void setup_tri_coefficients( struct setup_context *setup )
729 {
730 struct llvmpipe_context *llvmpipe = setup->llvmpipe;
731 const struct lp_fragment_shader *lpfs = llvmpipe->fs;
732 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
733 uint fragSlot;
734
735 /* z and w are done by linear interpolation:
736 */
737 tri_linear_coeff(setup, &setup->posCoef, 0, 2);
738 tri_linear_coeff(setup, &setup->posCoef, 0, 3);
739
740 /* setup interpolation for all the remaining attributes:
741 */
742 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
743 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
744 uint j;
745
746 switch (vinfo->attrib[fragSlot].interp_mode) {
747 case INTERP_CONSTANT:
748 for (j = 0; j < NUM_CHANNELS; j++)
749 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
750 break;
751 case INTERP_LINEAR:
752 for (j = 0; j < NUM_CHANNELS; j++)
753 tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
754 break;
755 case INTERP_PERSPECTIVE:
756 for (j = 0; j < NUM_CHANNELS; j++)
757 tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
758 break;
759 case INTERP_POS:
760 setup_fragcoord_coeff(setup, fragSlot);
761 break;
762 default:
763 assert(0);
764 }
765
766 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
767 setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing;
768 setup->coef[fragSlot].dadx[0] = 0.0;
769 setup->coef[fragSlot].dady[0] = 0.0;
770 }
771 }
772 }
773
774
775
776 static void setup_tri_edges( struct setup_context *setup )
777 {
778 float vmin_x = setup->vmin[0][0] + 0.5f;
779 float vmid_x = setup->vmid[0][0] + 0.5f;
780
781 float vmin_y = setup->vmin[0][1] - 0.5f;
782 float vmid_y = setup->vmid[0][1] - 0.5f;
783 float vmax_y = setup->vmax[0][1] - 0.5f;
784
785 setup->emaj.sy = ceilf(vmin_y);
786 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
787 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
788 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
789
790 setup->etop.sy = ceilf(vmid_y);
791 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
792 setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
793 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
794
795 setup->ebot.sy = ceilf(vmin_y);
796 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
797 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
798 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
799 }
800
801
802 /**
803 * Render the upper or lower half of a triangle.
804 * Scissoring/cliprect is applied here too.
805 */
806 static void subtriangle( struct setup_context *setup,
807 struct edge *eleft,
808 struct edge *eright,
809 unsigned lines )
810 {
811 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
812 const int minx = (int) cliprect->minx;
813 const int maxx = (int) cliprect->maxx;
814 const int miny = (int) cliprect->miny;
815 const int maxy = (int) cliprect->maxy;
816 int y, start_y, finish_y;
817 int sy = (int)eleft->sy;
818
819 assert((int)eleft->sy == (int) eright->sy);
820
821 /* clip top/bottom */
822 start_y = sy;
823 if (start_y < miny)
824 start_y = miny;
825
826 finish_y = sy + lines;
827 if (finish_y > maxy)
828 finish_y = maxy;
829
830 start_y -= sy;
831 finish_y -= sy;
832
833 /*
834 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
835 */
836
837 for (y = start_y; y < finish_y; y++) {
838
839 /* avoid accumulating adds as floats don't have the precision to
840 * accurately iterate large triangle edges that way. luckily we
841 * can just multiply these days.
842 *
843 * this is all drowned out by the attribute interpolation anyway.
844 */
845 int left = (int)(eleft->sx + y * eleft->dxdy);
846 int right = (int)(eright->sx + y * eright->dxdy);
847
848 /* clip left/right */
849 if (left < minx)
850 left = minx;
851 if (right > maxx)
852 right = maxx;
853
854 if (left < right) {
855 int _y = sy + y;
856 if (block(_y) != setup->span.y) {
857 flush_spans(setup);
858 setup->span.y = block(_y);
859 }
860
861 setup->span.left[_y&1] = left;
862 setup->span.right[_y&1] = right;
863 }
864 }
865
866
867 /* save the values so that emaj can be restarted:
868 */
869 eleft->sx += lines * eleft->dxdy;
870 eright->sx += lines * eright->dxdy;
871 eleft->sy += lines;
872 eright->sy += lines;
873 }
874
875
876 /**
877 * Recalculate prim's determinant. This is needed as we don't have
878 * get this information through the vbuf_render interface & we must
879 * calculate it here.
880 */
881 static float
882 calc_det( const float (*v0)[4],
883 const float (*v1)[4],
884 const float (*v2)[4] )
885 {
886 /* edge vectors e = v0 - v2, f = v1 - v2 */
887 const float ex = v0[0][0] - v2[0][0];
888 const float ey = v0[0][1] - v2[0][1];
889 const float fx = v1[0][0] - v2[0][0];
890 const float fy = v1[0][1] - v2[0][1];
891
892 /* det = cross(e,f).z */
893 return ex * fy - ey * fx;
894 }
895
896
897 /**
898 * Do setup for triangle rasterization, then render the triangle.
899 */
900 void setup_tri( struct setup_context *setup,
901 const float (*v0)[4],
902 const float (*v1)[4],
903 const float (*v2)[4] )
904 {
905 float det;
906
907 #if DEBUG_VERTS
908 debug_printf("Setup triangle:\n");
909 print_vertex(setup, v0);
910 print_vertex(setup, v1);
911 print_vertex(setup, v2);
912 #endif
913
914 if (setup->llvmpipe->no_rast)
915 return;
916
917 det = calc_det(v0, v1, v2);
918 /*
919 debug_printf("%s\n", __FUNCTION__ );
920 */
921
922 #if DEBUG_FRAGS
923 setup->numFragsEmitted = 0;
924 setup->numFragsWritten = 0;
925 #endif
926
927 if (cull_tri( setup, det ))
928 return;
929
930 if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
931 return;
932 setup_tri_coefficients( setup );
933 setup_tri_edges( setup );
934
935 setup->quad.input.prim = QUAD_PRIM_TRI;
936
937 setup->span.y = 0;
938 setup->span.right[0] = 0;
939 setup->span.right[1] = 0;
940 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
941
942 /* init_constant_attribs( setup ); */
943
944 if (setup->oneoverarea < 0.0) {
945 /* emaj on left:
946 */
947 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
948 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
949 }
950 else {
951 /* emaj on right:
952 */
953 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
954 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
955 }
956
957 flush_spans( setup );
958
959 WAIT_FOR_COMPLETION(setup);
960
961 #if DEBUG_FRAGS
962 printf("Tri: %u frags emitted, %u written\n",
963 setup->numFragsEmitted,
964 setup->numFragsWritten);
965 #endif
966 }
967
968
969
970 /**
971 * Compute a0, dadx and dady for a linearly interpolated coefficient,
972 * for a line.
973 */
974 static void
975 line_linear_coeff(const struct setup_context *setup,
976 struct tgsi_interp_coef *coef,
977 uint vertSlot, uint i)
978 {
979 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
980 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
981 const float dady = da * setup->emaj.dy * setup->oneoverarea;
982 coef->dadx[i] = dadx;
983 coef->dady[i] = dady;
984 coef->a0[i] = (setup->vmin[vertSlot][i] -
985 (dadx * (setup->vmin[0][0] - 0.5f) +
986 dady * (setup->vmin[0][1] - 0.5f)));
987 }
988
989
990 /**
991 * Compute a0, dadx and dady for a perspective-corrected interpolant,
992 * for a line.
993 */
994 static void
995 line_persp_coeff(const struct setup_context *setup,
996 struct tgsi_interp_coef *coef,
997 uint vertSlot, uint i)
998 {
999 /* XXX double-check/verify this arithmetic */
1000 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
1001 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
1002 const float da = a1 - a0;
1003 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
1004 const float dady = da * setup->emaj.dy * setup->oneoverarea;
1005 coef->dadx[i] = dadx;
1006 coef->dady[i] = dady;
1007 coef->a0[i] = (setup->vmin[vertSlot][i] -
1008 (dadx * (setup->vmin[0][0] - 0.5f) +
1009 dady * (setup->vmin[0][1] - 0.5f)));
1010 }
1011
1012
1013 /**
1014 * Compute the setup->coef[] array dadx, dady, a0 values.
1015 * Must be called after setup->vmin,vmax are initialized.
1016 */
1017 static INLINE boolean
1018 setup_line_coefficients(struct setup_context *setup,
1019 const float (*v0)[4],
1020 const float (*v1)[4])
1021 {
1022 struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1023 const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1024 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1025 uint fragSlot;
1026 float area;
1027
1028 /* use setup->vmin, vmax to point to vertices */
1029 if (llvmpipe->rasterizer->flatshade_first)
1030 setup->vprovoke = v0;
1031 else
1032 setup->vprovoke = v1;
1033 setup->vmin = v0;
1034 setup->vmax = v1;
1035
1036 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
1037 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
1038
1039 /* NOTE: this is not really area but something proportional to it */
1040 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
1041 if (area == 0.0f || util_is_inf_or_nan(area))
1042 return FALSE;
1043 setup->oneoverarea = 1.0f / area;
1044
1045 /* z and w are done by linear interpolation:
1046 */
1047 line_linear_coeff(setup, &setup->posCoef, 0, 2);
1048 line_linear_coeff(setup, &setup->posCoef, 0, 3);
1049
1050 /* setup interpolation for all the remaining attributes:
1051 */
1052 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1053 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1054 uint j;
1055
1056 switch (vinfo->attrib[fragSlot].interp_mode) {
1057 case INTERP_CONSTANT:
1058 for (j = 0; j < NUM_CHANNELS; j++)
1059 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1060 break;
1061 case INTERP_LINEAR:
1062 for (j = 0; j < NUM_CHANNELS; j++)
1063 line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1064 break;
1065 case INTERP_PERSPECTIVE:
1066 for (j = 0; j < NUM_CHANNELS; j++)
1067 line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1068 break;
1069 case INTERP_POS:
1070 setup_fragcoord_coeff(setup, fragSlot);
1071 break;
1072 default:
1073 assert(0);
1074 }
1075
1076 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1077 setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing;
1078 setup->coef[fragSlot].dadx[0] = 0.0;
1079 setup->coef[fragSlot].dady[0] = 0.0;
1080 }
1081 }
1082 return TRUE;
1083 }
1084
1085
1086 /**
1087 * Plot a pixel in a line segment.
1088 */
1089 static INLINE void
1090 plot(struct setup_context *setup, int x, int y)
1091 {
1092 const int iy = y & 1;
1093 const int ix = x & 1;
1094 const int quadX = x - ix;
1095 const int quadY = y - iy;
1096 const int mask = (1 << ix) << (2 * iy);
1097
1098 if (quadX != setup->quad.input.x0 ||
1099 quadY != setup->quad.input.y0)
1100 {
1101 /* flush prev quad, start new quad */
1102
1103 if (setup->quad.input.x0 != -1)
1104 CLIP_EMIT_QUAD(setup);
1105
1106 setup->quad.input.x0 = quadX;
1107 setup->quad.input.y0 = quadY;
1108 setup->quad.inout.mask = 0x0;
1109 }
1110
1111 setup->quad.inout.mask |= mask;
1112 }
1113
1114
1115 /**
1116 * Do setup for line rasterization, then render the line.
1117 * Single-pixel width, no stipple, etc. We rely on the 'draw' module
1118 * to handle stippling and wide lines.
1119 */
1120 void
1121 setup_line(struct setup_context *setup,
1122 const float (*v0)[4],
1123 const float (*v1)[4])
1124 {
1125 int x0 = (int) v0[0][0];
1126 int x1 = (int) v1[0][0];
1127 int y0 = (int) v0[0][1];
1128 int y1 = (int) v1[0][1];
1129 int dx = x1 - x0;
1130 int dy = y1 - y0;
1131 int xstep, ystep;
1132
1133 #if DEBUG_VERTS
1134 debug_printf("Setup line:\n");
1135 print_vertex(setup, v0);
1136 print_vertex(setup, v1);
1137 #endif
1138
1139 if (setup->llvmpipe->no_rast)
1140 return;
1141
1142 if (dx == 0 && dy == 0)
1143 return;
1144
1145 if (!setup_line_coefficients(setup, v0, v1))
1146 return;
1147
1148 assert(v0[0][0] < 1.0e9);
1149 assert(v0[0][1] < 1.0e9);
1150 assert(v1[0][0] < 1.0e9);
1151 assert(v1[0][1] < 1.0e9);
1152
1153 if (dx < 0) {
1154 dx = -dx; /* make positive */
1155 xstep = -1;
1156 }
1157 else {
1158 xstep = 1;
1159 }
1160
1161 if (dy < 0) {
1162 dy = -dy; /* make positive */
1163 ystep = -1;
1164 }
1165 else {
1166 ystep = 1;
1167 }
1168
1169 assert(dx >= 0);
1170 assert(dy >= 0);
1171
1172 setup->quad.input.x0 = setup->quad.input.y0 = -1;
1173 setup->quad.inout.mask = 0x0;
1174 setup->quad.input.prim = QUAD_PRIM_LINE;
1175 /* XXX temporary: set coverage to 1.0 so the line appears
1176 * if AA mode happens to be enabled.
1177 */
1178 setup->quad.input.coverage[0] =
1179 setup->quad.input.coverage[1] =
1180 setup->quad.input.coverage[2] =
1181 setup->quad.input.coverage[3] = 1.0;
1182
1183 if (dx > dy) {
1184 /*** X-major line ***/
1185 int i;
1186 const int errorInc = dy + dy;
1187 int error = errorInc - dx;
1188 const int errorDec = error - dx;
1189
1190 for (i = 0; i < dx; i++) {
1191 plot(setup, x0, y0);
1192
1193 x0 += xstep;
1194 if (error < 0) {
1195 error += errorInc;
1196 }
1197 else {
1198 error += errorDec;
1199 y0 += ystep;
1200 }
1201 }
1202 }
1203 else {
1204 /*** Y-major line ***/
1205 int i;
1206 const int errorInc = dx + dx;
1207 int error = errorInc - dy;
1208 const int errorDec = error - dy;
1209
1210 for (i = 0; i < dy; i++) {
1211 plot(setup, x0, y0);
1212
1213 y0 += ystep;
1214 if (error < 0) {
1215 error += errorInc;
1216 }
1217 else {
1218 error += errorDec;
1219 x0 += xstep;
1220 }
1221 }
1222 }
1223
1224 /* draw final quad */
1225 if (setup->quad.inout.mask) {
1226 CLIP_EMIT_QUAD(setup);
1227 }
1228
1229 WAIT_FOR_COMPLETION(setup);
1230 }
1231
1232
1233 static void
1234 point_persp_coeff(const struct setup_context *setup,
1235 const float (*vert)[4],
1236 struct tgsi_interp_coef *coef,
1237 uint vertSlot, uint i)
1238 {
1239 assert(i <= 3);
1240 coef->dadx[i] = 0.0F;
1241 coef->dady[i] = 0.0F;
1242 coef->a0[i] = vert[vertSlot][i] * vert[0][3];
1243 }
1244
1245
1246 /**
1247 * Do setup for point rasterization, then render the point.
1248 * Round or square points...
1249 * XXX could optimize a lot for 1-pixel points.
1250 */
1251 void
1252 setup_point( struct setup_context *setup,
1253 const float (*v0)[4] )
1254 {
1255 struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1256 const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1257 const int sizeAttr = setup->llvmpipe->psize_slot;
1258 const float size
1259 = sizeAttr > 0 ? v0[sizeAttr][0]
1260 : setup->llvmpipe->rasterizer->point_size;
1261 const float halfSize = 0.5F * size;
1262 const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth;
1263 const float x = v0[0][0]; /* Note: data[0] is always position */
1264 const float y = v0[0][1];
1265 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1266 uint fragSlot;
1267
1268 #if DEBUG_VERTS
1269 debug_printf("Setup point:\n");
1270 print_vertex(setup, v0);
1271 #endif
1272
1273 if (llvmpipe->no_rast)
1274 return;
1275
1276 /* For points, all interpolants are constant-valued.
1277 * However, for point sprites, we'll need to setup texcoords appropriately.
1278 * XXX: which coefficients are the texcoords???
1279 * We may do point sprites as textured quads...
1280 *
1281 * KW: We don't know which coefficients are texcoords - ultimately
1282 * the choice of what interpolation mode to use for each attribute
1283 * should be determined by the fragment program, using
1284 * per-attribute declaration statements that include interpolation
1285 * mode as a parameter. So either the fragment program will have
1286 * to be adjusted for pointsprite vs normal point behaviour, or
1287 * otherwise a special interpolation mode will have to be defined
1288 * which matches the required behaviour for point sprites. But -
1289 * the latter is not a feature of normal hardware, and as such
1290 * probably should be ruled out on that basis.
1291 */
1292 setup->vprovoke = v0;
1293
1294 /* setup Z, W */
1295 const_coeff(setup, &setup->posCoef, 0, 2);
1296 const_coeff(setup, &setup->posCoef, 0, 3);
1297
1298 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1299 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1300 uint j;
1301
1302 switch (vinfo->attrib[fragSlot].interp_mode) {
1303 case INTERP_CONSTANT:
1304 /* fall-through */
1305 case INTERP_LINEAR:
1306 for (j = 0; j < NUM_CHANNELS; j++)
1307 const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1308 break;
1309 case INTERP_PERSPECTIVE:
1310 for (j = 0; j < NUM_CHANNELS; j++)
1311 point_persp_coeff(setup, setup->vprovoke,
1312 &setup->coef[fragSlot], vertSlot, j);
1313 break;
1314 case INTERP_POS:
1315 setup_fragcoord_coeff(setup, fragSlot);
1316 break;
1317 default:
1318 assert(0);
1319 }
1320
1321 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1322 setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing;
1323 setup->coef[fragSlot].dadx[0] = 0.0;
1324 setup->coef[fragSlot].dady[0] = 0.0;
1325 }
1326 }
1327
1328 setup->quad.input.prim = QUAD_PRIM_POINT;
1329
1330 if (halfSize <= 0.5 && !round) {
1331 /* special case for 1-pixel points */
1332 const int ix = ((int) x) & 1;
1333 const int iy = ((int) y) & 1;
1334 setup->quad.input.x0 = (int) x - ix;
1335 setup->quad.input.y0 = (int) y - iy;
1336 setup->quad.inout.mask = (1 << ix) << (2 * iy);
1337 CLIP_EMIT_QUAD(setup);
1338 }
1339 else {
1340 if (round) {
1341 /* rounded points */
1342 const int ixmin = block((int) (x - halfSize));
1343 const int ixmax = block((int) (x + halfSize));
1344 const int iymin = block((int) (y - halfSize));
1345 const int iymax = block((int) (y + halfSize));
1346 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */
1347 const float rmax = halfSize + 0.7071F;
1348 const float rmin2 = MAX2(0.0F, rmin * rmin);
1349 const float rmax2 = rmax * rmax;
1350 const float cscale = 1.0F / (rmax2 - rmin2);
1351 int ix, iy;
1352
1353 for (iy = iymin; iy <= iymax; iy += 2) {
1354 for (ix = ixmin; ix <= ixmax; ix += 2) {
1355 float dx, dy, dist2, cover;
1356
1357 setup->quad.inout.mask = 0x0;
1358
1359 dx = (ix + 0.5f) - x;
1360 dy = (iy + 0.5f) - y;
1361 dist2 = dx * dx + dy * dy;
1362 if (dist2 <= rmax2) {
1363 cover = 1.0F - (dist2 - rmin2) * cscale;
1364 setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1365 setup->quad.inout.mask |= MASK_TOP_LEFT;
1366 }
1367
1368 dx = (ix + 1.5f) - x;
1369 dy = (iy + 0.5f) - y;
1370 dist2 = dx * dx + dy * dy;
1371 if (dist2 <= rmax2) {
1372 cover = 1.0F - (dist2 - rmin2) * cscale;
1373 setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1374 setup->quad.inout.mask |= MASK_TOP_RIGHT;
1375 }
1376
1377 dx = (ix + 0.5f) - x;
1378 dy = (iy + 1.5f) - y;
1379 dist2 = dx * dx + dy * dy;
1380 if (dist2 <= rmax2) {
1381 cover = 1.0F - (dist2 - rmin2) * cscale;
1382 setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1383 setup->quad.inout.mask |= MASK_BOTTOM_LEFT;
1384 }
1385
1386 dx = (ix + 1.5f) - x;
1387 dy = (iy + 1.5f) - y;
1388 dist2 = dx * dx + dy * dy;
1389 if (dist2 <= rmax2) {
1390 cover = 1.0F - (dist2 - rmin2) * cscale;
1391 setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1392 setup->quad.inout.mask |= MASK_BOTTOM_RIGHT;
1393 }
1394
1395 if (setup->quad.inout.mask) {
1396 setup->quad.input.x0 = ix;
1397 setup->quad.input.y0 = iy;
1398 CLIP_EMIT_QUAD(setup);
1399 }
1400 }
1401 }
1402 }
1403 else {
1404 /* square points */
1405 const int xmin = (int) (x + 0.75 - halfSize);
1406 const int ymin = (int) (y + 0.25 - halfSize);
1407 const int xmax = xmin + (int) size;
1408 const int ymax = ymin + (int) size;
1409 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1410 const int ixmin = block(xmin);
1411 const int ixmax = block(xmax - 1);
1412 const int iymin = block(ymin);
1413 const int iymax = block(ymax - 1);
1414 int ix, iy;
1415
1416 /*
1417 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1418 */
1419 for (iy = iymin; iy <= iymax; iy += 2) {
1420 uint rowMask = 0xf;
1421 if (iy < ymin) {
1422 /* above the top edge */
1423 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1424 }
1425 if (iy + 1 >= ymax) {
1426 /* below the bottom edge */
1427 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1428 }
1429
1430 for (ix = ixmin; ix <= ixmax; ix += 2) {
1431 uint mask = rowMask;
1432
1433 if (ix < xmin) {
1434 /* fragment is past left edge of point, turn off left bits */
1435 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1436 }
1437 if (ix + 1 >= xmax) {
1438 /* past the right edge */
1439 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1440 }
1441
1442 setup->quad.inout.mask = mask;
1443 setup->quad.input.x0 = ix;
1444 setup->quad.input.y0 = iy;
1445 CLIP_EMIT_QUAD(setup);
1446 }
1447 }
1448 }
1449 }
1450
1451 WAIT_FOR_COMPLETION(setup);
1452 }
1453
1454 void setup_prepare( struct setup_context *setup )
1455 {
1456 struct llvmpipe_context *lp = setup->llvmpipe;
1457 unsigned i;
1458
1459 if (lp->dirty) {
1460 llvmpipe_update_derived(lp);
1461 }
1462
1463 /* Note: nr_attrs is only used for debugging (vertex printing) */
1464 setup->quad.nr_attrs = draw_num_vs_outputs(lp->draw);
1465
1466 for (i = 0; i < LP_NUM_QUAD_THREADS; i++) {
1467 lp->quad[i].first->begin( lp->quad[i].first );
1468 }
1469
1470 if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1471 lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1472 lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1473 /* we'll do culling */
1474 setup->winding = lp->rasterizer->cull_mode;
1475 }
1476 else {
1477 /* 'draw' will do culling */
1478 setup->winding = PIPE_WINDING_NONE;
1479 }
1480 }
1481
1482
1483
1484 void setup_destroy_context( struct setup_context *setup )
1485 {
1486 FREE( setup );
1487 }
1488
1489
1490 /**
1491 * Create a new primitive setup/render stage.
1492 */
1493 struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe )
1494 {
1495 struct setup_context *setup = CALLOC_STRUCT(setup_context);
1496 #if LP_NUM_QUAD_THREADS > 1
1497 uint i;
1498 #endif
1499
1500 setup->llvmpipe = llvmpipe;
1501
1502 setup->quad.coef = setup->coef;
1503 setup->quad.posCoef = &setup->posCoef;
1504
1505 setup->span.left[0] = 1000000; /* greater than right[0] */
1506 setup->span.left[1] = 1000000; /* greater than right[1] */
1507
1508 #if LP_NUM_QUAD_THREADS > 1
1509 setup->que.first = 0;
1510 setup->que.last = 0;
1511 pipe_mutex_init( setup->que.que_mutex );
1512 pipe_condvar_init( setup->que.que_notfull_condvar );
1513 pipe_condvar_init( setup->que.que_notempty_condvar );
1514 setup->que.jobs_added = 0;
1515 setup->que.jobs_done = 0;
1516 pipe_condvar_init( setup->que.que_done_condvar );
1517 for (i = 0; i < LP_NUM_QUAD_THREADS; i++) {
1518 setup->threads[i].setup = setup;
1519 setup->threads[i].id = i;
1520 setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] );
1521 }
1522 #endif
1523
1524 return setup;
1525 }
1526