Merge branch 'mesa_7_6_branch'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * \brief Primitive rasterization/rendering (points, lines, triangles)
30 *
31 * \author Keith Whitwell <keith@tungstengraphics.com>
32 * \author Brian Paul
33 */
34
35 #include "lp_context.h"
36 #include "lp_prim_setup.h"
37 #include "lp_quad.h"
38 #include "lp_setup.h"
39 #include "lp_state.h"
40 #include "draw/draw_context.h"
41 #include "draw/draw_private.h"
42 #include "draw/draw_vertex.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "pipe/p_thread.h"
45 #include "util/u_math.h"
46 #include "util/u_memory.h"
47 #include "lp_bld_debug.h"
48 #include "lp_tile_cache.h"
49 #include "lp_tile_soa.h"
50
51
52 #define DEBUG_VERTS 0
53 #define DEBUG_FRAGS 0
54
55 /**
56 * Triangle edge info
57 */
58 struct edge {
59 float dx; /**< X(v1) - X(v0), used only during setup */
60 float dy; /**< Y(v1) - Y(v0), used only during setup */
61 float dxdy; /**< dx/dy */
62 float sx, sy; /**< first sample point coord */
63 int lines; /**< number of lines on this edge */
64 };
65
66
67 #define MAX_QUADS 16
68
69
70 /**
71 * Triangle setup info (derived from draw_stage).
72 * Also used for line drawing (taking some liberties).
73 */
74 struct setup_context {
75 struct llvmpipe_context *llvmpipe;
76
77 /* Vertices are just an array of floats making up each attribute in
78 * turn. Currently fixed at 4 floats, but should change in time.
79 * Codegen will help cope with this.
80 */
81 const float (*vmax)[4];
82 const float (*vmid)[4];
83 const float (*vmin)[4];
84 const float (*vprovoke)[4];
85
86 struct edge ebot;
87 struct edge etop;
88 struct edge emaj;
89
90 float oneoverarea;
91 int facing;
92
93 struct quad_header quad[MAX_QUADS];
94 struct quad_header *quad_ptrs[MAX_QUADS];
95 unsigned count;
96
97 struct quad_interp_coef coef;
98
99 struct {
100 int left[2]; /**< [0] = row0, [1] = row1 */
101 int right[2];
102 int y;
103 } span;
104
105 #if DEBUG_FRAGS
106 uint numFragsEmitted; /**< per primitive */
107 uint numFragsWritten; /**< per primitive */
108 #endif
109
110 unsigned winding; /* which winding to cull */
111 };
112
113
114
115 /**
116 * Execute fragment shader for the four fragments in the quad.
117 */
118 static void
119 shade_quads(struct llvmpipe_context *llvmpipe,
120 struct quad_header *quads[],
121 unsigned nr)
122 {
123 struct lp_fragment_shader *fs = llvmpipe->fs;
124 struct quad_header *quad = quads[0];
125 const unsigned x = quad->input.x0;
126 const unsigned y = quad->input.y0;
127 uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
128 uint8_t *color;
129 void *depth;
130 uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
131 unsigned chan_index;
132 unsigned q;
133
134 assert(fs->current);
135 if(!fs->current)
136 return;
137
138 /* Sanity checks */
139 assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
140 assert(x % TILE_VECTOR_WIDTH == 0);
141 assert(y % TILE_VECTOR_HEIGHT == 0);
142 for (q = 0; q < nr; ++q) {
143 assert(quads[q]->input.x0 == x + q*2);
144 assert(quads[q]->input.y0 == y);
145 }
146
147 /* mask */
148 for (q = 0; q < 4; ++q)
149 for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
150 mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
151
152 /* color buffer */
153 color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
154
155 /* depth buffer */
156 if(llvmpipe->zsbuf_map) {
157 assert((x % 2) == 0);
158 assert((y % 2) == 0);
159 depth = llvmpipe->zsbuf_map +
160 y*llvmpipe->zsbuf_transfer->stride +
161 2*x*llvmpipe->zsbuf_transfer->block.size;
162 }
163 else
164 depth = NULL;
165
166 /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
167 assert(lp_check_alignment(mask, 16));
168
169 assert(lp_check_alignment(depth, 16));
170 assert(lp_check_alignment(color, 16));
171 assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
172
173 /* run shader */
174 fs->current->jit_function( &llvmpipe->jit_context,
175 x, y,
176 quad->coef->a0,
177 quad->coef->dadx,
178 quad->coef->dady,
179 &mask[0][0],
180 color,
181 depth);
182 }
183
184
185
186
187 /**
188 * Do triangle cull test using tri determinant (sign indicates orientation)
189 * \return true if triangle is to be culled.
190 */
191 static INLINE boolean
192 cull_tri(const struct setup_context *setup, float det)
193 {
194 if (det != 0) {
195 /* if (det < 0 then Z points toward camera and triangle is
196 * counter-clockwise winding.
197 */
198 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
199
200 if ((winding & setup->winding) == 0)
201 return FALSE;
202 }
203
204 /* Culled:
205 */
206 return TRUE;
207 }
208
209
210
211 /**
212 * Clip setup->quad against the scissor/surface bounds.
213 */
214 static INLINE void
215 quad_clip( struct setup_context *setup, struct quad_header *quad )
216 {
217 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
218 const int minx = (int) cliprect->minx;
219 const int maxx = (int) cliprect->maxx;
220 const int miny = (int) cliprect->miny;
221 const int maxy = (int) cliprect->maxy;
222
223 if (quad->input.x0 >= maxx ||
224 quad->input.y0 >= maxy ||
225 quad->input.x0 + 1 < minx ||
226 quad->input.y0 + 1 < miny) {
227 /* totally clipped */
228 quad->inout.mask = 0x0;
229 return;
230 }
231 if (quad->input.x0 < minx)
232 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
233 if (quad->input.y0 < miny)
234 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
235 if (quad->input.x0 == maxx - 1)
236 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
237 if (quad->input.y0 == maxy - 1)
238 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
239 }
240
241
242
243 /**
244 * Given an X or Y coordinate, return the block/quad coordinate that it
245 * belongs to.
246 */
247 static INLINE int block( int x )
248 {
249 return x & ~(2-1);
250 }
251
252 static INLINE int block_x( int x )
253 {
254 return x & ~(TILE_VECTOR_WIDTH - 1);
255 }
256
257
258 /**
259 * Emit a quad (pass to next stage) with clipping.
260 */
261 static INLINE void
262 clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
263 {
264 quad_clip( setup, quad );
265
266 if (quad->inout.mask) {
267 struct llvmpipe_context *lp = setup->llvmpipe;
268
269 #if 1
270 /* XXX: The blender expects 4 quads. This is far from efficient, but
271 * until we codegenerate single-quad variants of the fragment pipeline
272 * we need this hack. */
273 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
274 struct quad_header quads[nr_quads];
275 struct quad_header *quad_ptrs[nr_quads];
276 int x0 = block_x(quad->input.x0);
277 unsigned i;
278
279 for(i = 0; i < nr_quads; ++i) {
280 int x = x0 + 2*i;
281 if(x == quad->input.x0)
282 memcpy(&quads[i], quad, sizeof quads[i]);
283 else {
284 memset(&quads[i], 0, sizeof quads[i]);
285 quads[i].input.x0 = x;
286 quads[i].input.y0 = quad->input.y0;
287 quads[i].coef = quad->coef;
288 }
289 quad_ptrs[i] = &quads[i];
290 }
291
292 shade_quads( lp, quad_ptrs, nr_quads );
293 #else
294 shade_quads( lp, &quad, 1 );
295 #endif
296 }
297 }
298
299
300 /**
301 * Render a horizontal span of quads
302 */
303 static void flush_spans( struct setup_context *setup )
304 {
305 const int step = TILE_VECTOR_WIDTH;
306 const int xleft0 = setup->span.left[0];
307 const int xleft1 = setup->span.left[1];
308 const int xright0 = setup->span.right[0];
309 const int xright1 = setup->span.right[1];
310
311
312 int minleft = block_x(MIN2(xleft0, xleft1));
313 int maxright = MAX2(xright0, xright1);
314 int x;
315
316 for (x = minleft; x < maxright; x += step) {
317 unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
318 unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
319 unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
320 unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
321 unsigned lx = x;
322 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
323 unsigned q = 0;
324
325 unsigned skipmask_left0 = (1U << skip_left0) - 1U;
326 unsigned skipmask_left1 = (1U << skip_left1) - 1U;
327
328 /* These calculations fail when step == 32 and skip_right == 0.
329 */
330 unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
331 unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
332
333 unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
334 unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
335
336 if (mask0 | mask1) {
337 for(q = 0; q < nr_quads; ++q) {
338 unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
339 setup->quad[q].input.x0 = lx;
340 setup->quad[q].input.y0 = setup->span.y;
341 setup->quad[q].inout.mask = quadmask;
342 setup->quad_ptrs[q] = &setup->quad[q];
343 mask0 >>= 2;
344 mask1 >>= 2;
345 lx += 2;
346 }
347 assert(!(mask0 | mask1));
348
349 shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads );
350 }
351 }
352
353
354 setup->span.y = 0;
355 setup->span.right[0] = 0;
356 setup->span.right[1] = 0;
357 setup->span.left[0] = 1000000; /* greater than right[0] */
358 setup->span.left[1] = 1000000; /* greater than right[1] */
359 }
360
361
362 #if DEBUG_VERTS
363 static void print_vertex(const struct setup_context *setup,
364 const float (*v)[4])
365 {
366 int i;
367 debug_printf(" Vertex: (%p)\n", v);
368 for (i = 0; i < setup->quad[0].nr_attrs; i++) {
369 debug_printf(" %d: %f %f %f %f\n", i,
370 v[i][0], v[i][1], v[i][2], v[i][3]);
371 if (util_is_inf_or_nan(v[i][0])) {
372 debug_printf(" NaN!\n");
373 }
374 }
375 }
376 #endif
377
378 /**
379 * Sort the vertices from top to bottom order, setting up the triangle
380 * edge fields (ebot, emaj, etop).
381 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
382 */
383 static boolean setup_sort_vertices( struct setup_context *setup,
384 float det,
385 const float (*v0)[4],
386 const float (*v1)[4],
387 const float (*v2)[4] )
388 {
389 setup->vprovoke = v2;
390
391 /* determine bottom to top order of vertices */
392 {
393 float y0 = v0[0][1];
394 float y1 = v1[0][1];
395 float y2 = v2[0][1];
396 if (y0 <= y1) {
397 if (y1 <= y2) {
398 /* y0<=y1<=y2 */
399 setup->vmin = v0;
400 setup->vmid = v1;
401 setup->vmax = v2;
402 }
403 else if (y2 <= y0) {
404 /* y2<=y0<=y1 */
405 setup->vmin = v2;
406 setup->vmid = v0;
407 setup->vmax = v1;
408 }
409 else {
410 /* y0<=y2<=y1 */
411 setup->vmin = v0;
412 setup->vmid = v2;
413 setup->vmax = v1;
414 }
415 }
416 else {
417 if (y0 <= y2) {
418 /* y1<=y0<=y2 */
419 setup->vmin = v1;
420 setup->vmid = v0;
421 setup->vmax = v2;
422 }
423 else if (y2 <= y1) {
424 /* y2<=y1<=y0 */
425 setup->vmin = v2;
426 setup->vmid = v1;
427 setup->vmax = v0;
428 }
429 else {
430 /* y1<=y2<=y0 */
431 setup->vmin = v1;
432 setup->vmid = v2;
433 setup->vmax = v0;
434 }
435 }
436 }
437
438 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
439 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
440 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
441 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
442 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
443 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
444
445 /*
446 * Compute triangle's area. Use 1/area to compute partial
447 * derivatives of attributes later.
448 *
449 * The area will be the same as prim->det, but the sign may be
450 * different depending on how the vertices get sorted above.
451 *
452 * To determine whether the primitive is front or back facing we
453 * use the prim->det value because its sign is correct.
454 */
455 {
456 const float area = (setup->emaj.dx * setup->ebot.dy -
457 setup->ebot.dx * setup->emaj.dy);
458
459 setup->oneoverarea = 1.0f / area;
460
461 /*
462 debug_printf("%s one-over-area %f area %f det %f\n",
463 __FUNCTION__, setup->oneoverarea, area, det );
464 */
465 if (util_is_inf_or_nan(setup->oneoverarea))
466 return FALSE;
467 }
468
469 /* We need to know if this is a front or back-facing triangle for:
470 * - the GLSL gl_FrontFacing fragment attribute (bool)
471 * - two-sided stencil test
472 */
473 setup->facing =
474 ((det > 0.0) ^
475 (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
476
477 return TRUE;
478 }
479
480
481 /**
482 * Compute a0, dadx and dady for a linearly interpolated coefficient,
483 * for a triangle.
484 */
485 static void tri_pos_coeff( struct setup_context *setup,
486 uint vertSlot, unsigned i)
487 {
488 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
489 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
490 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
491 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
492 float dadx = a * setup->oneoverarea;
493 float dady = b * setup->oneoverarea;
494
495 assert(i <= 3);
496
497 setup->coef.dadx[0][i] = dadx;
498 setup->coef.dady[0][i] = dady;
499
500 /* calculate a0 as the value which would be sampled for the
501 * fragment at (0,0), taking into account that we want to sample at
502 * pixel centers, in other words (0.5, 0.5).
503 *
504 * this is neat but unfortunately not a good way to do things for
505 * triangles with very large values of dadx or dady as it will
506 * result in the subtraction and re-addition from a0 of a very
507 * large number, which means we'll end up loosing a lot of the
508 * fractional bits and precision from a0. the way to fix this is
509 * to define a0 as the sample at a pixel center somewhere near vmin
510 * instead - i'll switch to this later.
511 */
512 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
513 (dadx * (setup->vmin[0][0] - 0.5f) +
514 dady * (setup->vmin[0][1] - 0.5f)));
515
516 /*
517 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
518 slot, "xyzw"[i],
519 setup->coef[slot].a0[i],
520 setup->coef[slot].dadx[i],
521 setup->coef[slot].dady[i]);
522 */
523 }
524
525
526 /**
527 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
528 * The value value comes from vertex[slot][i].
529 * The result will be put into setup->coef[slot].a0[i].
530 * \param slot which attribute slot
531 * \param i which component of the slot (0..3)
532 */
533 static void const_pos_coeff( struct setup_context *setup,
534 uint vertSlot, unsigned i)
535 {
536 setup->coef.dadx[0][i] = 0;
537 setup->coef.dady[0][i] = 0;
538
539 /* need provoking vertex info!
540 */
541 setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i];
542 }
543
544
545 /**
546 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
547 * The value value comes from vertex[slot][i].
548 * The result will be put into setup->coef[slot].a0[i].
549 * \param slot which attribute slot
550 * \param i which component of the slot (0..3)
551 */
552 static void const_coeff( struct setup_context *setup,
553 unsigned attrib,
554 uint vertSlot)
555 {
556 unsigned i;
557 for (i = 0; i < NUM_CHANNELS; ++i) {
558 setup->coef.dadx[1 + attrib][i] = 0;
559 setup->coef.dady[1 + attrib][i] = 0;
560
561 /* need provoking vertex info!
562 */
563 setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i];
564 }
565 }
566
567
568 /**
569 * Compute a0, dadx and dady for a linearly interpolated coefficient,
570 * for a triangle.
571 */
572 static void tri_linear_coeff( struct setup_context *setup,
573 unsigned attrib,
574 uint vertSlot)
575 {
576 unsigned i;
577 for (i = 0; i < NUM_CHANNELS; ++i) {
578 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
579 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
580 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
581 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
582 float dadx = a * setup->oneoverarea;
583 float dady = b * setup->oneoverarea;
584
585 assert(i <= 3);
586
587 setup->coef.dadx[1 + attrib][i] = dadx;
588 setup->coef.dady[1 + attrib][i] = dady;
589
590 /* calculate a0 as the value which would be sampled for the
591 * fragment at (0,0), taking into account that we want to sample at
592 * pixel centers, in other words (0.5, 0.5).
593 *
594 * this is neat but unfortunately not a good way to do things for
595 * triangles with very large values of dadx or dady as it will
596 * result in the subtraction and re-addition from a0 of a very
597 * large number, which means we'll end up loosing a lot of the
598 * fractional bits and precision from a0. the way to fix this is
599 * to define a0 as the sample at a pixel center somewhere near vmin
600 * instead - i'll switch to this later.
601 */
602 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
603 (dadx * (setup->vmin[0][0] - 0.5f) +
604 dady * (setup->vmin[0][1] - 0.5f)));
605
606 /*
607 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
608 slot, "xyzw"[i],
609 setup->coef[slot].a0[i],
610 setup->coef[slot].dadx[i],
611 setup->coef[slot].dady[i]);
612 */
613 }
614 }
615
616
617 /**
618 * Compute a0, dadx and dady for a perspective-corrected interpolant,
619 * for a triangle.
620 * We basically multiply the vertex value by 1/w before computing
621 * the plane coefficients (a0, dadx, dady).
622 * Later, when we compute the value at a particular fragment position we'll
623 * divide the interpolated value by the interpolated W at that fragment.
624 */
625 static void tri_persp_coeff( struct setup_context *setup,
626 unsigned attrib,
627 uint vertSlot)
628 {
629 unsigned i;
630 for (i = 0; i < NUM_CHANNELS; ++i) {
631 /* premultiply by 1/w (v[0][3] is always W):
632 */
633 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
634 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
635 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
636 float botda = mida - mina;
637 float majda = maxa - mina;
638 float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
639 float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
640 float dadx = a * setup->oneoverarea;
641 float dady = b * setup->oneoverarea;
642
643 /*
644 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
645 setup->vmin[vertSlot][i],
646 setup->vmid[vertSlot][i],
647 setup->vmax[vertSlot][i]
648 );
649 */
650 assert(i <= 3);
651
652 setup->coef.dadx[1 + attrib][i] = dadx;
653 setup->coef.dady[1 + attrib][i] = dady;
654 setup->coef.a0[1 + attrib][i] = (mina -
655 (dadx * (setup->vmin[0][0] - 0.5f) +
656 dady * (setup->vmin[0][1] - 0.5f)));
657 }
658 }
659
660
661 /**
662 * Special coefficient setup for gl_FragCoord.
663 * X and Y are trivial, though Y has to be inverted for OpenGL.
664 * Z and W are copied from posCoef which should have already been computed.
665 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
666 */
667 static void
668 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
669 {
670 /*X*/
671 setup->coef.a0[1 + slot][0] = 0;
672 setup->coef.dadx[1 + slot][0] = 1.0;
673 setup->coef.dady[1 + slot][0] = 0.0;
674 /*Y*/
675 setup->coef.a0[1 + slot][1] = 0.0;
676 setup->coef.dadx[1 + slot][1] = 0.0;
677 setup->coef.dady[1 + slot][1] = 1.0;
678 /*Z*/
679 setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2];
680 setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2];
681 setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2];
682 /*W*/
683 setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3];
684 setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3];
685 setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3];
686 }
687
688
689
690 /**
691 * Compute the setup->coef[] array dadx, dady, a0 values.
692 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
693 */
694 static void setup_tri_coefficients( struct setup_context *setup )
695 {
696 struct llvmpipe_context *llvmpipe = setup->llvmpipe;
697 const struct lp_fragment_shader *lpfs = llvmpipe->fs;
698 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
699 uint fragSlot;
700
701 /* z and w are done by linear interpolation:
702 */
703 tri_pos_coeff(setup, 0, 2);
704 tri_pos_coeff(setup, 0, 3);
705
706 /* setup interpolation for all the remaining attributes:
707 */
708 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
709 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
710
711 switch (vinfo->attrib[fragSlot].interp_mode) {
712 case INTERP_CONSTANT:
713 const_coeff(setup, fragSlot, vertSlot);
714 break;
715 case INTERP_LINEAR:
716 tri_linear_coeff(setup, fragSlot, vertSlot);
717 break;
718 case INTERP_PERSPECTIVE:
719 tri_persp_coeff(setup, fragSlot, vertSlot);
720 break;
721 case INTERP_POS:
722 setup_fragcoord_coeff(setup, fragSlot);
723 break;
724 default:
725 assert(0);
726 }
727
728 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
729 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
730 setup->coef.dadx[1 + fragSlot][0] = 0.0;
731 setup->coef.dady[1 + fragSlot][0] = 0.0;
732 }
733 }
734 }
735
736
737
738 static void setup_tri_edges( struct setup_context *setup )
739 {
740 float vmin_x = setup->vmin[0][0] + 0.5f;
741 float vmid_x = setup->vmid[0][0] + 0.5f;
742
743 float vmin_y = setup->vmin[0][1] - 0.5f;
744 float vmid_y = setup->vmid[0][1] - 0.5f;
745 float vmax_y = setup->vmax[0][1] - 0.5f;
746
747 setup->emaj.sy = ceilf(vmin_y);
748 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
749 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
750 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
751
752 setup->etop.sy = ceilf(vmid_y);
753 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
754 setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
755 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
756
757 setup->ebot.sy = ceilf(vmin_y);
758 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
759 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
760 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
761 }
762
763
764 /**
765 * Render the upper or lower half of a triangle.
766 * Scissoring/cliprect is applied here too.
767 */
768 static void subtriangle( struct setup_context *setup,
769 struct edge *eleft,
770 struct edge *eright,
771 unsigned lines )
772 {
773 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
774 const int minx = (int) cliprect->minx;
775 const int maxx = (int) cliprect->maxx;
776 const int miny = (int) cliprect->miny;
777 const int maxy = (int) cliprect->maxy;
778 int y, start_y, finish_y;
779 int sy = (int)eleft->sy;
780
781 assert((int)eleft->sy == (int) eright->sy);
782
783 /* clip top/bottom */
784 start_y = sy;
785 if (start_y < miny)
786 start_y = miny;
787
788 finish_y = sy + lines;
789 if (finish_y > maxy)
790 finish_y = maxy;
791
792 start_y -= sy;
793 finish_y -= sy;
794
795 /*
796 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
797 */
798
799 for (y = start_y; y < finish_y; y++) {
800
801 /* avoid accumulating adds as floats don't have the precision to
802 * accurately iterate large triangle edges that way. luckily we
803 * can just multiply these days.
804 *
805 * this is all drowned out by the attribute interpolation anyway.
806 */
807 int left = (int)(eleft->sx + y * eleft->dxdy);
808 int right = (int)(eright->sx + y * eright->dxdy);
809
810 /* clip left/right */
811 if (left < minx)
812 left = minx;
813 if (right > maxx)
814 right = maxx;
815
816 if (left < right) {
817 int _y = sy + y;
818 if (block(_y) != setup->span.y) {
819 flush_spans(setup);
820 setup->span.y = block(_y);
821 }
822
823 setup->span.left[_y&1] = left;
824 setup->span.right[_y&1] = right;
825 }
826 }
827
828
829 /* save the values so that emaj can be restarted:
830 */
831 eleft->sx += lines * eleft->dxdy;
832 eright->sx += lines * eright->dxdy;
833 eleft->sy += lines;
834 eright->sy += lines;
835 }
836
837
838 /**
839 * Recalculate prim's determinant. This is needed as we don't have
840 * get this information through the vbuf_render interface & we must
841 * calculate it here.
842 */
843 static float
844 calc_det( const float (*v0)[4],
845 const float (*v1)[4],
846 const float (*v2)[4] )
847 {
848 /* edge vectors e = v0 - v2, f = v1 - v2 */
849 const float ex = v0[0][0] - v2[0][0];
850 const float ey = v0[0][1] - v2[0][1];
851 const float fx = v1[0][0] - v2[0][0];
852 const float fy = v1[0][1] - v2[0][1];
853
854 /* det = cross(e,f).z */
855 return ex * fy - ey * fx;
856 }
857
858
859 /**
860 * Do setup for triangle rasterization, then render the triangle.
861 */
862 void llvmpipe_setup_tri( struct setup_context *setup,
863 const float (*v0)[4],
864 const float (*v1)[4],
865 const float (*v2)[4] )
866 {
867 float det;
868
869 #if DEBUG_VERTS
870 debug_printf("Setup triangle:\n");
871 print_vertex(setup, v0);
872 print_vertex(setup, v1);
873 print_vertex(setup, v2);
874 #endif
875
876 if (setup->llvmpipe->no_rast)
877 return;
878
879 det = calc_det(v0, v1, v2);
880 /*
881 debug_printf("%s\n", __FUNCTION__ );
882 */
883
884 #if DEBUG_FRAGS
885 setup->numFragsEmitted = 0;
886 setup->numFragsWritten = 0;
887 #endif
888
889 if (cull_tri( setup, det ))
890 return;
891
892 if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
893 return;
894 setup_tri_coefficients( setup );
895 setup_tri_edges( setup );
896
897 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
898
899 setup->span.y = 0;
900 setup->span.right[0] = 0;
901 setup->span.right[1] = 0;
902 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
903
904 /* init_constant_attribs( setup ); */
905
906 if (setup->oneoverarea < 0.0) {
907 /* emaj on left:
908 */
909 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
910 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
911 }
912 else {
913 /* emaj on right:
914 */
915 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
916 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
917 }
918
919 flush_spans( setup );
920
921 #if DEBUG_FRAGS
922 printf("Tri: %u frags emitted, %u written\n",
923 setup->numFragsEmitted,
924 setup->numFragsWritten);
925 #endif
926 }
927
928
929
930 /**
931 * Compute a0, dadx and dady for a linearly interpolated coefficient,
932 * for a line.
933 */
934 static void
935 linear_pos_coeff(struct setup_context *setup,
936 uint vertSlot, uint i)
937 {
938 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
939 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
940 const float dady = da * setup->emaj.dy * setup->oneoverarea;
941 setup->coef.dadx[0][i] = dadx;
942 setup->coef.dady[0][i] = dady;
943 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
944 (dadx * (setup->vmin[0][0] - 0.5f) +
945 dady * (setup->vmin[0][1] - 0.5f)));
946 }
947
948
949 /**
950 * Compute a0, dadx and dady for a linearly interpolated coefficient,
951 * for a line.
952 */
953 static void
954 line_linear_coeff(struct setup_context *setup,
955 unsigned attrib,
956 uint vertSlot)
957 {
958 unsigned i;
959 for (i = 0; i < NUM_CHANNELS; ++i) {
960 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
961 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
962 const float dady = da * setup->emaj.dy * setup->oneoverarea;
963 setup->coef.dadx[1 + attrib][i] = dadx;
964 setup->coef.dady[1 + attrib][i] = dady;
965 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
966 (dadx * (setup->vmin[0][0] - 0.5f) +
967 dady * (setup->vmin[0][1] - 0.5f)));
968 }
969 }
970
971
972 /**
973 * Compute a0, dadx and dady for a perspective-corrected interpolant,
974 * for a line.
975 */
976 static void
977 line_persp_coeff(struct setup_context *setup,
978 unsigned attrib,
979 uint vertSlot)
980 {
981 unsigned i;
982 for (i = 0; i < NUM_CHANNELS; ++i) {
983 /* XXX double-check/verify this arithmetic */
984 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
985 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
986 const float da = a1 - a0;
987 const float dadx = da * setup->emaj.dx * setup->oneoverarea;
988 const float dady = da * setup->emaj.dy * setup->oneoverarea;
989 setup->coef.dadx[1 + attrib][i] = dadx;
990 setup->coef.dady[1 + attrib][i] = dady;
991 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
992 (dadx * (setup->vmin[0][0] - 0.5f) +
993 dady * (setup->vmin[0][1] - 0.5f)));
994 }
995 }
996
997
998 /**
999 * Compute the setup->coef[] array dadx, dady, a0 values.
1000 * Must be called after setup->vmin,vmax are initialized.
1001 */
1002 static INLINE boolean
1003 setup_line_coefficients(struct setup_context *setup,
1004 const float (*v0)[4],
1005 const float (*v1)[4])
1006 {
1007 struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1008 const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1009 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1010 uint fragSlot;
1011 float area;
1012
1013 /* use setup->vmin, vmax to point to vertices */
1014 if (llvmpipe->rasterizer->flatshade_first)
1015 setup->vprovoke = v0;
1016 else
1017 setup->vprovoke = v1;
1018 setup->vmin = v0;
1019 setup->vmax = v1;
1020
1021 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
1022 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
1023
1024 /* NOTE: this is not really area but something proportional to it */
1025 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
1026 if (area == 0.0f || util_is_inf_or_nan(area))
1027 return FALSE;
1028 setup->oneoverarea = 1.0f / area;
1029
1030 /* z and w are done by linear interpolation:
1031 */
1032 linear_pos_coeff(setup, 0, 2);
1033 linear_pos_coeff(setup, 0, 3);
1034
1035 /* setup interpolation for all the remaining attributes:
1036 */
1037 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1038 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1039
1040 switch (vinfo->attrib[fragSlot].interp_mode) {
1041 case INTERP_CONSTANT:
1042 const_coeff(setup, fragSlot, vertSlot);
1043 break;
1044 case INTERP_LINEAR:
1045 line_linear_coeff(setup, fragSlot, vertSlot);
1046 break;
1047 case INTERP_PERSPECTIVE:
1048 line_persp_coeff(setup, fragSlot, vertSlot);
1049 break;
1050 case INTERP_POS:
1051 setup_fragcoord_coeff(setup, fragSlot);
1052 break;
1053 default:
1054 assert(0);
1055 }
1056
1057 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1058 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1059 setup->coef.dadx[1 + fragSlot][0] = 0.0;
1060 setup->coef.dady[1 + fragSlot][0] = 0.0;
1061 }
1062 }
1063 return TRUE;
1064 }
1065
1066
1067 /**
1068 * Plot a pixel in a line segment.
1069 */
1070 static INLINE void
1071 plot(struct setup_context *setup, int x, int y)
1072 {
1073 const int iy = y & 1;
1074 const int ix = x & 1;
1075 const int quadX = x - ix;
1076 const int quadY = y - iy;
1077 const int mask = (1 << ix) << (2 * iy);
1078
1079 if (quadX != setup->quad[0].input.x0 ||
1080 quadY != setup->quad[0].input.y0)
1081 {
1082 /* flush prev quad, start new quad */
1083
1084 if (setup->quad[0].input.x0 != -1)
1085 clip_emit_quad( setup, &setup->quad[0] );
1086
1087 setup->quad[0].input.x0 = quadX;
1088 setup->quad[0].input.y0 = quadY;
1089 setup->quad[0].inout.mask = 0x0;
1090 }
1091
1092 setup->quad[0].inout.mask |= mask;
1093 }
1094
1095
1096 /**
1097 * Do setup for line rasterization, then render the line.
1098 * Single-pixel width, no stipple, etc. We rely on the 'draw' module
1099 * to handle stippling and wide lines.
1100 */
1101 void
1102 llvmpipe_setup_line(struct setup_context *setup,
1103 const float (*v0)[4],
1104 const float (*v1)[4])
1105 {
1106 int x0 = (int) v0[0][0];
1107 int x1 = (int) v1[0][0];
1108 int y0 = (int) v0[0][1];
1109 int y1 = (int) v1[0][1];
1110 int dx = x1 - x0;
1111 int dy = y1 - y0;
1112 int xstep, ystep;
1113
1114 #if DEBUG_VERTS
1115 debug_printf("Setup line:\n");
1116 print_vertex(setup, v0);
1117 print_vertex(setup, v1);
1118 #endif
1119
1120 if (setup->llvmpipe->no_rast)
1121 return;
1122
1123 if (dx == 0 && dy == 0)
1124 return;
1125
1126 if (!setup_line_coefficients(setup, v0, v1))
1127 return;
1128
1129 assert(v0[0][0] < 1.0e9);
1130 assert(v0[0][1] < 1.0e9);
1131 assert(v1[0][0] < 1.0e9);
1132 assert(v1[0][1] < 1.0e9);
1133
1134 if (dx < 0) {
1135 dx = -dx; /* make positive */
1136 xstep = -1;
1137 }
1138 else {
1139 xstep = 1;
1140 }
1141
1142 if (dy < 0) {
1143 dy = -dy; /* make positive */
1144 ystep = -1;
1145 }
1146 else {
1147 ystep = 1;
1148 }
1149
1150 assert(dx >= 0);
1151 assert(dy >= 0);
1152 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES);
1153
1154 setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
1155 setup->quad[0].inout.mask = 0x0;
1156
1157 /* XXX temporary: set coverage to 1.0 so the line appears
1158 * if AA mode happens to be enabled.
1159 */
1160 setup->quad[0].input.coverage[0] =
1161 setup->quad[0].input.coverage[1] =
1162 setup->quad[0].input.coverage[2] =
1163 setup->quad[0].input.coverage[3] = 1.0;
1164
1165 if (dx > dy) {
1166 /*** X-major line ***/
1167 int i;
1168 const int errorInc = dy + dy;
1169 int error = errorInc - dx;
1170 const int errorDec = error - dx;
1171
1172 for (i = 0; i < dx; i++) {
1173 plot(setup, x0, y0);
1174
1175 x0 += xstep;
1176 if (error < 0) {
1177 error += errorInc;
1178 }
1179 else {
1180 error += errorDec;
1181 y0 += ystep;
1182 }
1183 }
1184 }
1185 else {
1186 /*** Y-major line ***/
1187 int i;
1188 const int errorInc = dx + dx;
1189 int error = errorInc - dy;
1190 const int errorDec = error - dy;
1191
1192 for (i = 0; i < dy; i++) {
1193 plot(setup, x0, y0);
1194
1195 y0 += ystep;
1196 if (error < 0) {
1197 error += errorInc;
1198 }
1199 else {
1200 error += errorDec;
1201 x0 += xstep;
1202 }
1203 }
1204 }
1205
1206 /* draw final quad */
1207 if (setup->quad[0].inout.mask) {
1208 clip_emit_quad( setup, &setup->quad[0] );
1209 }
1210 }
1211
1212
1213 static void
1214 point_persp_coeff(struct setup_context *setup,
1215 const float (*vert)[4],
1216 unsigned attrib,
1217 uint vertSlot)
1218 {
1219 unsigned i;
1220 for(i = 0; i < NUM_CHANNELS; ++i) {
1221 setup->coef.dadx[1 + attrib][i] = 0.0F;
1222 setup->coef.dady[1 + attrib][i] = 0.0F;
1223 setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3];
1224 }
1225 }
1226
1227
1228 /**
1229 * Do setup for point rasterization, then render the point.
1230 * Round or square points...
1231 * XXX could optimize a lot for 1-pixel points.
1232 */
1233 void
1234 llvmpipe_setup_point( struct setup_context *setup,
1235 const float (*v0)[4] )
1236 {
1237 struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1238 const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1239 const int sizeAttr = setup->llvmpipe->psize_slot;
1240 const float size
1241 = sizeAttr > 0 ? v0[sizeAttr][0]
1242 : setup->llvmpipe->rasterizer->point_size;
1243 const float halfSize = 0.5F * size;
1244 const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth;
1245 const float x = v0[0][0]; /* Note: data[0] is always position */
1246 const float y = v0[0][1];
1247 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1248 uint fragSlot;
1249
1250 #if DEBUG_VERTS
1251 debug_printf("Setup point:\n");
1252 print_vertex(setup, v0);
1253 #endif
1254
1255 if (llvmpipe->no_rast)
1256 return;
1257
1258 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS);
1259
1260 /* For points, all interpolants are constant-valued.
1261 * However, for point sprites, we'll need to setup texcoords appropriately.
1262 * XXX: which coefficients are the texcoords???
1263 * We may do point sprites as textured quads...
1264 *
1265 * KW: We don't know which coefficients are texcoords - ultimately
1266 * the choice of what interpolation mode to use for each attribute
1267 * should be determined by the fragment program, using
1268 * per-attribute declaration statements that include interpolation
1269 * mode as a parameter. So either the fragment program will have
1270 * to be adjusted for pointsprite vs normal point behaviour, or
1271 * otherwise a special interpolation mode will have to be defined
1272 * which matches the required behaviour for point sprites. But -
1273 * the latter is not a feature of normal hardware, and as such
1274 * probably should be ruled out on that basis.
1275 */
1276 setup->vprovoke = v0;
1277
1278 /* setup Z, W */
1279 const_pos_coeff(setup, 0, 2);
1280 const_pos_coeff(setup, 0, 3);
1281
1282 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1283 const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1284
1285 switch (vinfo->attrib[fragSlot].interp_mode) {
1286 case INTERP_CONSTANT:
1287 /* fall-through */
1288 case INTERP_LINEAR:
1289 const_coeff(setup, fragSlot, vertSlot);
1290 break;
1291 case INTERP_PERSPECTIVE:
1292 point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot);
1293 break;
1294 case INTERP_POS:
1295 setup_fragcoord_coeff(setup, fragSlot);
1296 break;
1297 default:
1298 assert(0);
1299 }
1300
1301 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1302 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1303 setup->coef.dadx[1 + fragSlot][0] = 0.0;
1304 setup->coef.dady[1 + fragSlot][0] = 0.0;
1305 }
1306 }
1307
1308
1309 if (halfSize <= 0.5 && !round) {
1310 /* special case for 1-pixel points */
1311 const int ix = ((int) x) & 1;
1312 const int iy = ((int) y) & 1;
1313 setup->quad[0].input.x0 = (int) x - ix;
1314 setup->quad[0].input.y0 = (int) y - iy;
1315 setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
1316 clip_emit_quad( setup, &setup->quad[0] );
1317 }
1318 else {
1319 if (round) {
1320 /* rounded points */
1321 const int ixmin = block((int) (x - halfSize));
1322 const int ixmax = block((int) (x + halfSize));
1323 const int iymin = block((int) (y - halfSize));
1324 const int iymax = block((int) (y + halfSize));
1325 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */
1326 const float rmax = halfSize + 0.7071F;
1327 const float rmin2 = MAX2(0.0F, rmin * rmin);
1328 const float rmax2 = rmax * rmax;
1329 const float cscale = 1.0F / (rmax2 - rmin2);
1330 int ix, iy;
1331
1332 for (iy = iymin; iy <= iymax; iy += 2) {
1333 for (ix = ixmin; ix <= ixmax; ix += 2) {
1334 float dx, dy, dist2, cover;
1335
1336 setup->quad[0].inout.mask = 0x0;
1337
1338 dx = (ix + 0.5f) - x;
1339 dy = (iy + 0.5f) - y;
1340 dist2 = dx * dx + dy * dy;
1341 if (dist2 <= rmax2) {
1342 cover = 1.0F - (dist2 - rmin2) * cscale;
1343 setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1344 setup->quad[0].inout.mask |= MASK_TOP_LEFT;
1345 }
1346
1347 dx = (ix + 1.5f) - x;
1348 dy = (iy + 0.5f) - y;
1349 dist2 = dx * dx + dy * dy;
1350 if (dist2 <= rmax2) {
1351 cover = 1.0F - (dist2 - rmin2) * cscale;
1352 setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1353 setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
1354 }
1355
1356 dx = (ix + 0.5f) - x;
1357 dy = (iy + 1.5f) - y;
1358 dist2 = dx * dx + dy * dy;
1359 if (dist2 <= rmax2) {
1360 cover = 1.0F - (dist2 - rmin2) * cscale;
1361 setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1362 setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
1363 }
1364
1365 dx = (ix + 1.5f) - x;
1366 dy = (iy + 1.5f) - y;
1367 dist2 = dx * dx + dy * dy;
1368 if (dist2 <= rmax2) {
1369 cover = 1.0F - (dist2 - rmin2) * cscale;
1370 setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1371 setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
1372 }
1373
1374 if (setup->quad[0].inout.mask) {
1375 setup->quad[0].input.x0 = ix;
1376 setup->quad[0].input.y0 = iy;
1377 clip_emit_quad( setup, &setup->quad[0] );
1378 }
1379 }
1380 }
1381 }
1382 else {
1383 /* square points */
1384 const int xmin = (int) (x + 0.75 - halfSize);
1385 const int ymin = (int) (y + 0.25 - halfSize);
1386 const int xmax = xmin + (int) size;
1387 const int ymax = ymin + (int) size;
1388 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1389 const int ixmin = block(xmin);
1390 const int ixmax = block(xmax - 1);
1391 const int iymin = block(ymin);
1392 const int iymax = block(ymax - 1);
1393 int ix, iy;
1394
1395 /*
1396 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1397 */
1398 for (iy = iymin; iy <= iymax; iy += 2) {
1399 uint rowMask = 0xf;
1400 if (iy < ymin) {
1401 /* above the top edge */
1402 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1403 }
1404 if (iy + 1 >= ymax) {
1405 /* below the bottom edge */
1406 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1407 }
1408
1409 for (ix = ixmin; ix <= ixmax; ix += 2) {
1410 uint mask = rowMask;
1411
1412 if (ix < xmin) {
1413 /* fragment is past left edge of point, turn off left bits */
1414 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1415 }
1416 if (ix + 1 >= xmax) {
1417 /* past the right edge */
1418 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1419 }
1420
1421 setup->quad[0].inout.mask = mask;
1422 setup->quad[0].input.x0 = ix;
1423 setup->quad[0].input.y0 = iy;
1424 clip_emit_quad( setup, &setup->quad[0] );
1425 }
1426 }
1427 }
1428 }
1429 }
1430
1431 void llvmpipe_setup_prepare( struct setup_context *setup )
1432 {
1433 struct llvmpipe_context *lp = setup->llvmpipe;
1434
1435 if (lp->dirty) {
1436 llvmpipe_update_derived(lp);
1437 }
1438
1439 if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1440 lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1441 lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1442 /* we'll do culling */
1443 setup->winding = lp->rasterizer->cull_mode;
1444 }
1445 else {
1446 /* 'draw' will do culling */
1447 setup->winding = PIPE_WINDING_NONE;
1448 }
1449 }
1450
1451
1452
1453 void llvmpipe_setup_destroy_context( struct setup_context *setup )
1454 {
1455 align_free( setup );
1456 }
1457
1458
1459 /**
1460 * Create a new primitive setup/render stage.
1461 */
1462 struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe )
1463 {
1464 struct setup_context *setup;
1465 unsigned i;
1466
1467 setup = align_malloc(sizeof(struct setup_context), 16);
1468 if (!setup)
1469 return NULL;
1470
1471 memset(setup, 0, sizeof *setup);
1472 setup->llvmpipe = llvmpipe;
1473
1474 for (i = 0; i < MAX_QUADS; i++) {
1475 setup->quad[i].coef = &setup->coef;
1476 }
1477
1478 setup->span.left[0] = 1000000; /* greater than right[0] */
1479 setup->span.left[1] = 1000000; /* greater than right[1] */
1480
1481 return setup;
1482 }
1483