1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * \brief Primitive rasterization/rendering (points, lines, triangles)
31 * \author Keith Whitwell <keith@tungstengraphics.com>
35 #include "lp_context.h"
36 #include "lp_prim_setup.h"
40 #include "draw/draw_context.h"
41 #include "draw/draw_private.h"
42 #include "draw/draw_vertex.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "pipe/p_thread.h"
45 #include "util/u_math.h"
46 #include "util/u_memory.h"
47 #include "lp_tile_cache.h"
48 #include "lp_tile_soa.h"
58 float dx
; /**< X(v1) - X(v0), used only during setup */
59 float dy
; /**< Y(v1) - Y(v0), used only during setup */
60 float dxdy
; /**< dx/dy */
61 float sx
, sy
; /**< first sample point coord */
62 int lines
; /**< number of lines on this edge */
70 * Triangle setup info (derived from draw_stage).
71 * Also used for line drawing (taking some liberties).
73 struct setup_context
{
74 struct llvmpipe_context
*llvmpipe
;
76 /* Vertices are just an array of floats making up each attribute in
77 * turn. Currently fixed at 4 floats, but should change in time.
78 * Codegen will help cope with this.
80 const float (*vmax
)[4];
81 const float (*vmid
)[4];
82 const float (*vmin
)[4];
83 const float (*vprovoke
)[4];
92 struct quad_header quad
[MAX_QUADS
];
93 struct quad_header
*quad_ptrs
[MAX_QUADS
];
96 struct quad_interp_coef coef
;
99 int left
[2]; /**< [0] = row0, [1] = row1 */
105 uint numFragsEmitted
; /**< per primitive */
106 uint numFragsWritten
; /**< per primitive */
109 unsigned winding
; /* which winding to cull */
115 * Execute fragment shader for the four fragments in the quad.
118 shade_quads(struct llvmpipe_context
*llvmpipe
,
119 struct quad_header
*quads
[],
122 struct lp_fragment_shader
*fs
= llvmpipe
->fs
;
123 struct quad_header
*quad
= quads
[0];
124 const unsigned x
= quad
->input
.x0
;
125 const unsigned y
= quad
->input
.y0
;
126 uint8_t *tile
= lp_get_cached_tile(llvmpipe
->cbuf_cache
[0], x
, y
);
129 uint32_t ALIGN16_ATTRIB mask
[4][NUM_CHANNELS
];
138 assert(nr
* QUAD_SIZE
== TILE_VECTOR_HEIGHT
* TILE_VECTOR_WIDTH
);
139 assert(x
% TILE_VECTOR_WIDTH
== 0);
140 assert(y
% TILE_VECTOR_HEIGHT
== 0);
141 for (q
= 0; q
< nr
; ++q
) {
142 assert(quads
[q
]->input
.x0
== x
+ q
*2);
143 assert(quads
[q
]->input
.y0
== y
);
147 for (q
= 0; q
< 4; ++q
)
148 for (chan_index
= 0; chan_index
< NUM_CHANNELS
; ++chan_index
)
149 mask
[q
][chan_index
] = quads
[q
]->inout
.mask
& (1 << chan_index
) ? ~0 : 0;
152 color
= &TILE_PIXEL(tile
, x
& (TILE_SIZE
-1), y
& (TILE_SIZE
-1), 0);
155 if(llvmpipe
->zsbuf_map
) {
156 assert((x
% 2) == 0);
157 assert((y
% 2) == 0);
158 depth
= llvmpipe
->zsbuf_map
+
159 y
*llvmpipe
->zsbuf_transfer
->stride
+
160 2*x
*llvmpipe
->zsbuf_transfer
->block
.size
;
165 /* TODO: blend color */
167 assert((((uintptr_t)mask
) & 0xf) == 0);
168 assert((((uintptr_t)depth
) & 0xf) == 0);
169 assert((((uintptr_t)color
) & 0xf) == 0);
170 assert((((uintptr_t)llvmpipe
->jit_context
.blend_color
) & 0xf) == 0);
173 fs
->current
->jit_function( &llvmpipe
->jit_context
,
187 * Do triangle cull test using tri determinant (sign indicates orientation)
188 * \return true if triangle is to be culled.
190 static INLINE boolean
191 cull_tri(const struct setup_context
*setup
, float det
)
194 /* if (det < 0 then Z points toward camera and triangle is
195 * counter-clockwise winding.
197 unsigned winding
= (det
< 0) ? PIPE_WINDING_CCW
: PIPE_WINDING_CW
;
199 if ((winding
& setup
->winding
) == 0)
211 * Clip setup->quad against the scissor/surface bounds.
214 quad_clip( struct setup_context
*setup
, struct quad_header
*quad
)
216 const struct pipe_scissor_state
*cliprect
= &setup
->llvmpipe
->cliprect
;
217 const int minx
= (int) cliprect
->minx
;
218 const int maxx
= (int) cliprect
->maxx
;
219 const int miny
= (int) cliprect
->miny
;
220 const int maxy
= (int) cliprect
->maxy
;
222 if (quad
->input
.x0
>= maxx
||
223 quad
->input
.y0
>= maxy
||
224 quad
->input
.x0
+ 1 < minx
||
225 quad
->input
.y0
+ 1 < miny
) {
226 /* totally clipped */
227 quad
->inout
.mask
= 0x0;
230 if (quad
->input
.x0
< minx
)
231 quad
->inout
.mask
&= (MASK_BOTTOM_RIGHT
| MASK_TOP_RIGHT
);
232 if (quad
->input
.y0
< miny
)
233 quad
->inout
.mask
&= (MASK_BOTTOM_LEFT
| MASK_BOTTOM_RIGHT
);
234 if (quad
->input
.x0
== maxx
- 1)
235 quad
->inout
.mask
&= (MASK_BOTTOM_LEFT
| MASK_TOP_LEFT
);
236 if (quad
->input
.y0
== maxy
- 1)
237 quad
->inout
.mask
&= (MASK_TOP_LEFT
| MASK_TOP_RIGHT
);
243 * Given an X or Y coordinate, return the block/quad coordinate that it
246 static INLINE
int block( int x
)
251 static INLINE
int block_x( int x
)
253 return x
& ~(TILE_VECTOR_WIDTH
- 1);
258 * Emit a quad (pass to next stage) with clipping.
261 clip_emit_quad( struct setup_context
*setup
, struct quad_header
*quad
)
263 quad_clip( setup
, quad
);
265 if (quad
->inout
.mask
) {
266 struct llvmpipe_context
*lp
= setup
->llvmpipe
;
269 /* XXX: The blender expects 4 quads. This is far from efficient, but
270 * until we codegenerate single-quad variants of the fragment pipeline
271 * we need this hack. */
272 const unsigned nr_quads
= TILE_VECTOR_HEIGHT
*TILE_VECTOR_WIDTH
/QUAD_SIZE
;
273 struct quad_header quads
[nr_quads
];
274 struct quad_header
*quad_ptrs
[nr_quads
];
275 int x0
= block_x(quad
->input
.x0
);
278 for(i
= 0; i
< nr_quads
; ++i
) {
280 if(x
== quad
->input
.x0
)
281 memcpy(&quads
[i
], quad
, sizeof quads
[i
]);
283 memset(&quads
[i
], 0, sizeof quads
[i
]);
284 quads
[i
].input
.x0
= x
;
285 quads
[i
].input
.y0
= quad
->input
.y0
;
286 quads
[i
].coef
= quad
->coef
;
288 quad_ptrs
[i
] = &quads
[i
];
291 shade_quads( lp
, quad_ptrs
, nr_quads
);
293 shade_quads( lp
, &quad
, 1 );
300 * Render a horizontal span of quads
302 static void flush_spans( struct setup_context
*setup
)
304 const int step
= TILE_VECTOR_WIDTH
;
305 const int xleft0
= setup
->span
.left
[0];
306 const int xleft1
= setup
->span
.left
[1];
307 const int xright0
= setup
->span
.right
[0];
308 const int xright1
= setup
->span
.right
[1];
311 int minleft
= block_x(MIN2(xleft0
, xleft1
));
312 int maxright
= MAX2(xright0
, xright1
);
315 for (x
= minleft
; x
< maxright
; x
+= step
) {
316 unsigned skip_left0
= CLAMP(xleft0
- x
, 0, step
);
317 unsigned skip_left1
= CLAMP(xleft1
- x
, 0, step
);
318 unsigned skip_right0
= CLAMP(x
+ step
- xright0
, 0, step
);
319 unsigned skip_right1
= CLAMP(x
+ step
- xright1
, 0, step
);
321 const unsigned nr_quads
= TILE_VECTOR_HEIGHT
*TILE_VECTOR_WIDTH
/QUAD_SIZE
;
324 unsigned skipmask_left0
= (1U << skip_left0
) - 1U;
325 unsigned skipmask_left1
= (1U << skip_left1
) - 1U;
327 /* These calculations fail when step == 32 and skip_right == 0.
329 unsigned skipmask_right0
= ~0U << (unsigned)(step
- skip_right0
);
330 unsigned skipmask_right1
= ~0U << (unsigned)(step
- skip_right1
);
332 unsigned mask0
= ~skipmask_left0
& ~skipmask_right0
;
333 unsigned mask1
= ~skipmask_left1
& ~skipmask_right1
;
336 for(q
= 0; q
< nr_quads
; ++q
) {
337 unsigned quadmask
= (mask0
& 3) | ((mask1
& 3) << 2);
338 setup
->quad
[q
].input
.x0
= lx
;
339 setup
->quad
[q
].input
.y0
= setup
->span
.y
;
340 setup
->quad
[q
].inout
.mask
= quadmask
;
341 setup
->quad_ptrs
[q
] = &setup
->quad
[q
];
346 assert(!(mask0
| mask1
));
348 shade_quads(setup
->llvmpipe
, setup
->quad_ptrs
, nr_quads
);
354 setup
->span
.right
[0] = 0;
355 setup
->span
.right
[1] = 0;
356 setup
->span
.left
[0] = 1000000; /* greater than right[0] */
357 setup
->span
.left
[1] = 1000000; /* greater than right[1] */
362 static void print_vertex(const struct setup_context
*setup
,
366 debug_printf(" Vertex: (%p)\n", v
);
367 for (i
= 0; i
< setup
->quad
[0].nr_attrs
; i
++) {
368 debug_printf(" %d: %f %f %f %f\n", i
,
369 v
[i
][0], v
[i
][1], v
[i
][2], v
[i
][3]);
370 if (util_is_inf_or_nan(v
[i
][0])) {
371 debug_printf(" NaN!\n");
378 * Sort the vertices from top to bottom order, setting up the triangle
379 * edge fields (ebot, emaj, etop).
380 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
382 static boolean
setup_sort_vertices( struct setup_context
*setup
,
384 const float (*v0
)[4],
385 const float (*v1
)[4],
386 const float (*v2
)[4] )
388 setup
->vprovoke
= v2
;
390 /* determine bottom to top order of vertices */
437 setup
->ebot
.dx
= setup
->vmid
[0][0] - setup
->vmin
[0][0];
438 setup
->ebot
.dy
= setup
->vmid
[0][1] - setup
->vmin
[0][1];
439 setup
->emaj
.dx
= setup
->vmax
[0][0] - setup
->vmin
[0][0];
440 setup
->emaj
.dy
= setup
->vmax
[0][1] - setup
->vmin
[0][1];
441 setup
->etop
.dx
= setup
->vmax
[0][0] - setup
->vmid
[0][0];
442 setup
->etop
.dy
= setup
->vmax
[0][1] - setup
->vmid
[0][1];
445 * Compute triangle's area. Use 1/area to compute partial
446 * derivatives of attributes later.
448 * The area will be the same as prim->det, but the sign may be
449 * different depending on how the vertices get sorted above.
451 * To determine whether the primitive is front or back facing we
452 * use the prim->det value because its sign is correct.
455 const float area
= (setup
->emaj
.dx
* setup
->ebot
.dy
-
456 setup
->ebot
.dx
* setup
->emaj
.dy
);
458 setup
->oneoverarea
= 1.0f
/ area
;
461 debug_printf("%s one-over-area %f area %f det %f\n",
462 __FUNCTION__, setup->oneoverarea, area, det );
464 if (util_is_inf_or_nan(setup
->oneoverarea
))
468 /* We need to know if this is a front or back-facing triangle for:
469 * - the GLSL gl_FrontFacing fragment attribute (bool)
470 * - two-sided stencil test
474 (setup
->llvmpipe
->rasterizer
->front_winding
== PIPE_WINDING_CW
));
481 * Compute a0, dadx and dady for a linearly interpolated coefficient,
484 static void tri_pos_coeff( struct setup_context
*setup
,
485 uint vertSlot
, unsigned i
)
487 float botda
= setup
->vmid
[vertSlot
][i
] - setup
->vmin
[vertSlot
][i
];
488 float majda
= setup
->vmax
[vertSlot
][i
] - setup
->vmin
[vertSlot
][i
];
489 float a
= setup
->ebot
.dy
* majda
- botda
* setup
->emaj
.dy
;
490 float b
= setup
->emaj
.dx
* botda
- majda
* setup
->ebot
.dx
;
491 float dadx
= a
* setup
->oneoverarea
;
492 float dady
= b
* setup
->oneoverarea
;
496 setup
->coef
.dadx
[0][i
] = dadx
;
497 setup
->coef
.dady
[0][i
] = dady
;
499 /* calculate a0 as the value which would be sampled for the
500 * fragment at (0,0), taking into account that we want to sample at
501 * pixel centers, in other words (0.5, 0.5).
503 * this is neat but unfortunately not a good way to do things for
504 * triangles with very large values of dadx or dady as it will
505 * result in the subtraction and re-addition from a0 of a very
506 * large number, which means we'll end up loosing a lot of the
507 * fractional bits and precision from a0. the way to fix this is
508 * to define a0 as the sample at a pixel center somewhere near vmin
509 * instead - i'll switch to this later.
511 setup
->coef
.a0
[0][i
] = (setup
->vmin
[vertSlot
][i
] -
512 (dadx
* (setup
->vmin
[0][0] - 0.5f
) +
513 dady
* (setup
->vmin
[0][1] - 0.5f
)));
516 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
518 setup->coef[slot].a0[i],
519 setup->coef[slot].dadx[i],
520 setup->coef[slot].dady[i]);
526 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
527 * The value value comes from vertex[slot][i].
528 * The result will be put into setup->coef[slot].a0[i].
529 * \param slot which attribute slot
530 * \param i which component of the slot (0..3)
532 static void const_pos_coeff( struct setup_context
*setup
,
533 uint vertSlot
, unsigned i
)
535 setup
->coef
.dadx
[0][i
] = 0;
536 setup
->coef
.dady
[0][i
] = 0;
538 /* need provoking vertex info!
540 setup
->coef
.a0
[0][i
] = setup
->vprovoke
[vertSlot
][i
];
545 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
546 * The value value comes from vertex[slot][i].
547 * The result will be put into setup->coef[slot].a0[i].
548 * \param slot which attribute slot
549 * \param i which component of the slot (0..3)
551 static void const_coeff( struct setup_context
*setup
,
556 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
557 setup
->coef
.dadx
[1 + attrib
][i
] = 0;
558 setup
->coef
.dady
[1 + attrib
][i
] = 0;
560 /* need provoking vertex info!
562 setup
->coef
.a0
[1 + attrib
][i
] = setup
->vprovoke
[vertSlot
][i
];
568 * Compute a0, dadx and dady for a linearly interpolated coefficient,
571 static void tri_linear_coeff( struct setup_context
*setup
,
576 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
577 float botda
= setup
->vmid
[vertSlot
][i
] - setup
->vmin
[vertSlot
][i
];
578 float majda
= setup
->vmax
[vertSlot
][i
] - setup
->vmin
[vertSlot
][i
];
579 float a
= setup
->ebot
.dy
* majda
- botda
* setup
->emaj
.dy
;
580 float b
= setup
->emaj
.dx
* botda
- majda
* setup
->ebot
.dx
;
581 float dadx
= a
* setup
->oneoverarea
;
582 float dady
= b
* setup
->oneoverarea
;
586 setup
->coef
.dadx
[1 + attrib
][i
] = dadx
;
587 setup
->coef
.dady
[1 + attrib
][i
] = dady
;
589 /* calculate a0 as the value which would be sampled for the
590 * fragment at (0,0), taking into account that we want to sample at
591 * pixel centers, in other words (0.5, 0.5).
593 * this is neat but unfortunately not a good way to do things for
594 * triangles with very large values of dadx or dady as it will
595 * result in the subtraction and re-addition from a0 of a very
596 * large number, which means we'll end up loosing a lot of the
597 * fractional bits and precision from a0. the way to fix this is
598 * to define a0 as the sample at a pixel center somewhere near vmin
599 * instead - i'll switch to this later.
601 setup
->coef
.a0
[1 + attrib
][i
] = (setup
->vmin
[vertSlot
][i
] -
602 (dadx
* (setup
->vmin
[0][0] - 0.5f
) +
603 dady
* (setup
->vmin
[0][1] - 0.5f
)));
606 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
608 setup->coef[slot].a0[i],
609 setup->coef[slot].dadx[i],
610 setup->coef[slot].dady[i]);
617 * Compute a0, dadx and dady for a perspective-corrected interpolant,
619 * We basically multiply the vertex value by 1/w before computing
620 * the plane coefficients (a0, dadx, dady).
621 * Later, when we compute the value at a particular fragment position we'll
622 * divide the interpolated value by the interpolated W at that fragment.
624 static void tri_persp_coeff( struct setup_context
*setup
,
629 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
630 /* premultiply by 1/w (v[0][3] is always W):
632 float mina
= setup
->vmin
[vertSlot
][i
] * setup
->vmin
[0][3];
633 float mida
= setup
->vmid
[vertSlot
][i
] * setup
->vmid
[0][3];
634 float maxa
= setup
->vmax
[vertSlot
][i
] * setup
->vmax
[0][3];
635 float botda
= mida
- mina
;
636 float majda
= maxa
- mina
;
637 float a
= setup
->ebot
.dy
* majda
- botda
* setup
->emaj
.dy
;
638 float b
= setup
->emaj
.dx
* botda
- majda
* setup
->ebot
.dx
;
639 float dadx
= a
* setup
->oneoverarea
;
640 float dady
= b
* setup
->oneoverarea
;
643 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
644 setup->vmin[vertSlot][i],
645 setup->vmid[vertSlot][i],
646 setup->vmax[vertSlot][i]
651 setup
->coef
.dadx
[1 + attrib
][i
] = dadx
;
652 setup
->coef
.dady
[1 + attrib
][i
] = dady
;
653 setup
->coef
.a0
[1 + attrib
][i
] = (mina
-
654 (dadx
* (setup
->vmin
[0][0] - 0.5f
) +
655 dady
* (setup
->vmin
[0][1] - 0.5f
)));
661 * Special coefficient setup for gl_FragCoord.
662 * X and Y are trivial, though Y has to be inverted for OpenGL.
663 * Z and W are copied from posCoef which should have already been computed.
664 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
667 setup_fragcoord_coeff(struct setup_context
*setup
, uint slot
)
670 setup
->coef
.a0
[1 + slot
][0] = 0;
671 setup
->coef
.dadx
[1 + slot
][0] = 1.0;
672 setup
->coef
.dady
[1 + slot
][0] = 0.0;
674 setup
->coef
.a0
[1 + slot
][1] = 0.0;
675 setup
->coef
.dadx
[1 + slot
][1] = 0.0;
676 setup
->coef
.dady
[1 + slot
][1] = 1.0;
678 setup
->coef
.a0
[1 + slot
][2] = setup
->coef
.a0
[0][2];
679 setup
->coef
.dadx
[1 + slot
][2] = setup
->coef
.dadx
[0][2];
680 setup
->coef
.dady
[1 + slot
][2] = setup
->coef
.dady
[0][2];
682 setup
->coef
.a0
[1 + slot
][3] = setup
->coef
.a0
[0][3];
683 setup
->coef
.dadx
[1 + slot
][3] = setup
->coef
.dadx
[0][3];
684 setup
->coef
.dady
[1 + slot
][3] = setup
->coef
.dady
[0][3];
690 * Compute the setup->coef[] array dadx, dady, a0 values.
691 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
693 static void setup_tri_coefficients( struct setup_context
*setup
)
695 struct llvmpipe_context
*llvmpipe
= setup
->llvmpipe
;
696 const struct lp_fragment_shader
*lpfs
= llvmpipe
->fs
;
697 const struct vertex_info
*vinfo
= llvmpipe_get_vertex_info(llvmpipe
);
700 /* z and w are done by linear interpolation:
702 tri_pos_coeff(setup
, 0, 2);
703 tri_pos_coeff(setup
, 0, 3);
705 /* setup interpolation for all the remaining attributes:
707 for (fragSlot
= 0; fragSlot
< lpfs
->info
.num_inputs
; fragSlot
++) {
708 const uint vertSlot
= vinfo
->attrib
[fragSlot
].src_index
;
710 switch (vinfo
->attrib
[fragSlot
].interp_mode
) {
711 case INTERP_CONSTANT
:
712 const_coeff(setup
, fragSlot
, vertSlot
);
715 tri_linear_coeff(setup
, fragSlot
, vertSlot
);
717 case INTERP_PERSPECTIVE
:
718 tri_persp_coeff(setup
, fragSlot
, vertSlot
);
721 setup_fragcoord_coeff(setup
, fragSlot
);
727 if (lpfs
->info
.input_semantic_name
[fragSlot
] == TGSI_SEMANTIC_FACE
) {
728 setup
->coef
.a0
[1 + fragSlot
][0] = 1.0f
- setup
->facing
;
729 setup
->coef
.dadx
[1 + fragSlot
][0] = 0.0;
730 setup
->coef
.dady
[1 + fragSlot
][0] = 0.0;
737 static void setup_tri_edges( struct setup_context
*setup
)
739 float vmin_x
= setup
->vmin
[0][0] + 0.5f
;
740 float vmid_x
= setup
->vmid
[0][0] + 0.5f
;
742 float vmin_y
= setup
->vmin
[0][1] - 0.5f
;
743 float vmid_y
= setup
->vmid
[0][1] - 0.5f
;
744 float vmax_y
= setup
->vmax
[0][1] - 0.5f
;
746 setup
->emaj
.sy
= ceilf(vmin_y
);
747 setup
->emaj
.lines
= (int) ceilf(vmax_y
- setup
->emaj
.sy
);
748 setup
->emaj
.dxdy
= setup
->emaj
.dx
/ setup
->emaj
.dy
;
749 setup
->emaj
.sx
= vmin_x
+ (setup
->emaj
.sy
- vmin_y
) * setup
->emaj
.dxdy
;
751 setup
->etop
.sy
= ceilf(vmid_y
);
752 setup
->etop
.lines
= (int) ceilf(vmax_y
- setup
->etop
.sy
);
753 setup
->etop
.dxdy
= setup
->etop
.dx
/ setup
->etop
.dy
;
754 setup
->etop
.sx
= vmid_x
+ (setup
->etop
.sy
- vmid_y
) * setup
->etop
.dxdy
;
756 setup
->ebot
.sy
= ceilf(vmin_y
);
757 setup
->ebot
.lines
= (int) ceilf(vmid_y
- setup
->ebot
.sy
);
758 setup
->ebot
.dxdy
= setup
->ebot
.dx
/ setup
->ebot
.dy
;
759 setup
->ebot
.sx
= vmin_x
+ (setup
->ebot
.sy
- vmin_y
) * setup
->ebot
.dxdy
;
764 * Render the upper or lower half of a triangle.
765 * Scissoring/cliprect is applied here too.
767 static void subtriangle( struct setup_context
*setup
,
772 const struct pipe_scissor_state
*cliprect
= &setup
->llvmpipe
->cliprect
;
773 const int minx
= (int) cliprect
->minx
;
774 const int maxx
= (int) cliprect
->maxx
;
775 const int miny
= (int) cliprect
->miny
;
776 const int maxy
= (int) cliprect
->maxy
;
777 int y
, start_y
, finish_y
;
778 int sy
= (int)eleft
->sy
;
780 assert((int)eleft
->sy
== (int) eright
->sy
);
782 /* clip top/bottom */
787 finish_y
= sy
+ lines
;
795 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
798 for (y
= start_y
; y
< finish_y
; y
++) {
800 /* avoid accumulating adds as floats don't have the precision to
801 * accurately iterate large triangle edges that way. luckily we
802 * can just multiply these days.
804 * this is all drowned out by the attribute interpolation anyway.
806 int left
= (int)(eleft
->sx
+ y
* eleft
->dxdy
);
807 int right
= (int)(eright
->sx
+ y
* eright
->dxdy
);
809 /* clip left/right */
817 if (block(_y
) != setup
->span
.y
) {
819 setup
->span
.y
= block(_y
);
822 setup
->span
.left
[_y
&1] = left
;
823 setup
->span
.right
[_y
&1] = right
;
828 /* save the values so that emaj can be restarted:
830 eleft
->sx
+= lines
* eleft
->dxdy
;
831 eright
->sx
+= lines
* eright
->dxdy
;
838 * Recalculate prim's determinant. This is needed as we don't have
839 * get this information through the vbuf_render interface & we must
843 calc_det( const float (*v0
)[4],
844 const float (*v1
)[4],
845 const float (*v2
)[4] )
847 /* edge vectors e = v0 - v2, f = v1 - v2 */
848 const float ex
= v0
[0][0] - v2
[0][0];
849 const float ey
= v0
[0][1] - v2
[0][1];
850 const float fx
= v1
[0][0] - v2
[0][0];
851 const float fy
= v1
[0][1] - v2
[0][1];
853 /* det = cross(e,f).z */
854 return ex
* fy
- ey
* fx
;
859 * Do setup for triangle rasterization, then render the triangle.
861 void llvmpipe_setup_tri( struct setup_context
*setup
,
862 const float (*v0
)[4],
863 const float (*v1
)[4],
864 const float (*v2
)[4] )
869 debug_printf("Setup triangle:\n");
870 print_vertex(setup
, v0
);
871 print_vertex(setup
, v1
);
872 print_vertex(setup
, v2
);
875 if (setup
->llvmpipe
->no_rast
)
878 det
= calc_det(v0
, v1
, v2
);
880 debug_printf("%s\n", __FUNCTION__ );
884 setup
->numFragsEmitted
= 0;
885 setup
->numFragsWritten
= 0;
888 if (cull_tri( setup
, det
))
891 if (!setup_sort_vertices( setup
, det
, v0
, v1
, v2
))
893 setup_tri_coefficients( setup
);
894 setup_tri_edges( setup
);
896 assert(setup
->llvmpipe
->reduced_prim
== PIPE_PRIM_TRIANGLES
);
899 setup
->span
.right
[0] = 0;
900 setup
->span
.right
[1] = 0;
901 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
903 /* init_constant_attribs( setup ); */
905 if (setup
->oneoverarea
< 0.0) {
908 subtriangle( setup
, &setup
->emaj
, &setup
->ebot
, setup
->ebot
.lines
);
909 subtriangle( setup
, &setup
->emaj
, &setup
->etop
, setup
->etop
.lines
);
914 subtriangle( setup
, &setup
->ebot
, &setup
->emaj
, setup
->ebot
.lines
);
915 subtriangle( setup
, &setup
->etop
, &setup
->emaj
, setup
->etop
.lines
);
918 flush_spans( setup
);
921 printf("Tri: %u frags emitted, %u written\n",
922 setup
->numFragsEmitted
,
923 setup
->numFragsWritten
);
930 * Compute a0, dadx and dady for a linearly interpolated coefficient,
934 linear_pos_coeff(struct setup_context
*setup
,
935 uint vertSlot
, uint i
)
937 const float da
= setup
->vmax
[vertSlot
][i
] - setup
->vmin
[vertSlot
][i
];
938 const float dadx
= da
* setup
->emaj
.dx
* setup
->oneoverarea
;
939 const float dady
= da
* setup
->emaj
.dy
* setup
->oneoverarea
;
940 setup
->coef
.dadx
[0][i
] = dadx
;
941 setup
->coef
.dady
[0][i
] = dady
;
942 setup
->coef
.a0
[0][i
] = (setup
->vmin
[vertSlot
][i
] -
943 (dadx
* (setup
->vmin
[0][0] - 0.5f
) +
944 dady
* (setup
->vmin
[0][1] - 0.5f
)));
949 * Compute a0, dadx and dady for a linearly interpolated coefficient,
953 line_linear_coeff(struct setup_context
*setup
,
958 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
959 const float da
= setup
->vmax
[vertSlot
][i
] - setup
->vmin
[vertSlot
][i
];
960 const float dadx
= da
* setup
->emaj
.dx
* setup
->oneoverarea
;
961 const float dady
= da
* setup
->emaj
.dy
* setup
->oneoverarea
;
962 setup
->coef
.dadx
[1 + attrib
][i
] = dadx
;
963 setup
->coef
.dady
[1 + attrib
][i
] = dady
;
964 setup
->coef
.a0
[1 + attrib
][i
] = (setup
->vmin
[vertSlot
][i
] -
965 (dadx
* (setup
->vmin
[0][0] - 0.5f
) +
966 dady
* (setup
->vmin
[0][1] - 0.5f
)));
972 * Compute a0, dadx and dady for a perspective-corrected interpolant,
976 line_persp_coeff(struct setup_context
*setup
,
981 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
982 /* XXX double-check/verify this arithmetic */
983 const float a0
= setup
->vmin
[vertSlot
][i
] * setup
->vmin
[0][3];
984 const float a1
= setup
->vmax
[vertSlot
][i
] * setup
->vmax
[0][3];
985 const float da
= a1
- a0
;
986 const float dadx
= da
* setup
->emaj
.dx
* setup
->oneoverarea
;
987 const float dady
= da
* setup
->emaj
.dy
* setup
->oneoverarea
;
988 setup
->coef
.dadx
[1 + attrib
][i
] = dadx
;
989 setup
->coef
.dady
[1 + attrib
][i
] = dady
;
990 setup
->coef
.a0
[1 + attrib
][i
] = (setup
->vmin
[vertSlot
][i
] -
991 (dadx
* (setup
->vmin
[0][0] - 0.5f
) +
992 dady
* (setup
->vmin
[0][1] - 0.5f
)));
998 * Compute the setup->coef[] array dadx, dady, a0 values.
999 * Must be called after setup->vmin,vmax are initialized.
1001 static INLINE boolean
1002 setup_line_coefficients(struct setup_context
*setup
,
1003 const float (*v0
)[4],
1004 const float (*v1
)[4])
1006 struct llvmpipe_context
*llvmpipe
= setup
->llvmpipe
;
1007 const struct lp_fragment_shader
*lpfs
= llvmpipe
->fs
;
1008 const struct vertex_info
*vinfo
= llvmpipe_get_vertex_info(llvmpipe
);
1012 /* use setup->vmin, vmax to point to vertices */
1013 if (llvmpipe
->rasterizer
->flatshade_first
)
1014 setup
->vprovoke
= v0
;
1016 setup
->vprovoke
= v1
;
1020 setup
->emaj
.dx
= setup
->vmax
[0][0] - setup
->vmin
[0][0];
1021 setup
->emaj
.dy
= setup
->vmax
[0][1] - setup
->vmin
[0][1];
1023 /* NOTE: this is not really area but something proportional to it */
1024 area
= setup
->emaj
.dx
* setup
->emaj
.dx
+ setup
->emaj
.dy
* setup
->emaj
.dy
;
1025 if (area
== 0.0f
|| util_is_inf_or_nan(area
))
1027 setup
->oneoverarea
= 1.0f
/ area
;
1029 /* z and w are done by linear interpolation:
1031 linear_pos_coeff(setup
, 0, 2);
1032 linear_pos_coeff(setup
, 0, 3);
1034 /* setup interpolation for all the remaining attributes:
1036 for (fragSlot
= 0; fragSlot
< lpfs
->info
.num_inputs
; fragSlot
++) {
1037 const uint vertSlot
= vinfo
->attrib
[fragSlot
].src_index
;
1039 switch (vinfo
->attrib
[fragSlot
].interp_mode
) {
1040 case INTERP_CONSTANT
:
1041 const_coeff(setup
, fragSlot
, vertSlot
);
1044 line_linear_coeff(setup
, fragSlot
, vertSlot
);
1046 case INTERP_PERSPECTIVE
:
1047 line_persp_coeff(setup
, fragSlot
, vertSlot
);
1050 setup_fragcoord_coeff(setup
, fragSlot
);
1056 if (lpfs
->info
.input_semantic_name
[fragSlot
] == TGSI_SEMANTIC_FACE
) {
1057 setup
->coef
.a0
[1 + fragSlot
][0] = 1.0f
- setup
->facing
;
1058 setup
->coef
.dadx
[1 + fragSlot
][0] = 0.0;
1059 setup
->coef
.dady
[1 + fragSlot
][0] = 0.0;
1067 * Plot a pixel in a line segment.
1070 plot(struct setup_context
*setup
, int x
, int y
)
1072 const int iy
= y
& 1;
1073 const int ix
= x
& 1;
1074 const int quadX
= x
- ix
;
1075 const int quadY
= y
- iy
;
1076 const int mask
= (1 << ix
) << (2 * iy
);
1078 if (quadX
!= setup
->quad
[0].input
.x0
||
1079 quadY
!= setup
->quad
[0].input
.y0
)
1081 /* flush prev quad, start new quad */
1083 if (setup
->quad
[0].input
.x0
!= -1)
1084 clip_emit_quad( setup
, &setup
->quad
[0] );
1086 setup
->quad
[0].input
.x0
= quadX
;
1087 setup
->quad
[0].input
.y0
= quadY
;
1088 setup
->quad
[0].inout
.mask
= 0x0;
1091 setup
->quad
[0].inout
.mask
|= mask
;
1096 * Do setup for line rasterization, then render the line.
1097 * Single-pixel width, no stipple, etc. We rely on the 'draw' module
1098 * to handle stippling and wide lines.
1101 llvmpipe_setup_line(struct setup_context
*setup
,
1102 const float (*v0
)[4],
1103 const float (*v1
)[4])
1105 int x0
= (int) v0
[0][0];
1106 int x1
= (int) v1
[0][0];
1107 int y0
= (int) v0
[0][1];
1108 int y1
= (int) v1
[0][1];
1114 debug_printf("Setup line:\n");
1115 print_vertex(setup
, v0
);
1116 print_vertex(setup
, v1
);
1119 if (setup
->llvmpipe
->no_rast
)
1122 if (dx
== 0 && dy
== 0)
1125 if (!setup_line_coefficients(setup
, v0
, v1
))
1128 assert(v0
[0][0] < 1.0e9
);
1129 assert(v0
[0][1] < 1.0e9
);
1130 assert(v1
[0][0] < 1.0e9
);
1131 assert(v1
[0][1] < 1.0e9
);
1134 dx
= -dx
; /* make positive */
1142 dy
= -dy
; /* make positive */
1151 assert(setup
->llvmpipe
->reduced_prim
== PIPE_PRIM_LINES
);
1153 setup
->quad
[0].input
.x0
= setup
->quad
[0].input
.y0
= -1;
1154 setup
->quad
[0].inout
.mask
= 0x0;
1156 /* XXX temporary: set coverage to 1.0 so the line appears
1157 * if AA mode happens to be enabled.
1159 setup
->quad
[0].input
.coverage
[0] =
1160 setup
->quad
[0].input
.coverage
[1] =
1161 setup
->quad
[0].input
.coverage
[2] =
1162 setup
->quad
[0].input
.coverage
[3] = 1.0;
1165 /*** X-major line ***/
1167 const int errorInc
= dy
+ dy
;
1168 int error
= errorInc
- dx
;
1169 const int errorDec
= error
- dx
;
1171 for (i
= 0; i
< dx
; i
++) {
1172 plot(setup
, x0
, y0
);
1185 /*** Y-major line ***/
1187 const int errorInc
= dx
+ dx
;
1188 int error
= errorInc
- dy
;
1189 const int errorDec
= error
- dy
;
1191 for (i
= 0; i
< dy
; i
++) {
1192 plot(setup
, x0
, y0
);
1205 /* draw final quad */
1206 if (setup
->quad
[0].inout
.mask
) {
1207 clip_emit_quad( setup
, &setup
->quad
[0] );
1213 point_persp_coeff(struct setup_context
*setup
,
1214 const float (*vert
)[4],
1219 for(i
= 0; i
< NUM_CHANNELS
; ++i
) {
1220 setup
->coef
.dadx
[1 + attrib
][i
] = 0.0F
;
1221 setup
->coef
.dady
[1 + attrib
][i
] = 0.0F
;
1222 setup
->coef
.a0
[1 + attrib
][i
] = vert
[vertSlot
][i
] * vert
[0][3];
1228 * Do setup for point rasterization, then render the point.
1229 * Round or square points...
1230 * XXX could optimize a lot for 1-pixel points.
1233 llvmpipe_setup_point( struct setup_context
*setup
,
1234 const float (*v0
)[4] )
1236 struct llvmpipe_context
*llvmpipe
= setup
->llvmpipe
;
1237 const struct lp_fragment_shader
*lpfs
= llvmpipe
->fs
;
1238 const int sizeAttr
= setup
->llvmpipe
->psize_slot
;
1240 = sizeAttr
> 0 ? v0
[sizeAttr
][0]
1241 : setup
->llvmpipe
->rasterizer
->point_size
;
1242 const float halfSize
= 0.5F
* size
;
1243 const boolean round
= (boolean
) setup
->llvmpipe
->rasterizer
->point_smooth
;
1244 const float x
= v0
[0][0]; /* Note: data[0] is always position */
1245 const float y
= v0
[0][1];
1246 const struct vertex_info
*vinfo
= llvmpipe_get_vertex_info(llvmpipe
);
1250 debug_printf("Setup point:\n");
1251 print_vertex(setup
, v0
);
1254 if (llvmpipe
->no_rast
)
1257 assert(setup
->llvmpipe
->reduced_prim
== PIPE_PRIM_POINTS
);
1259 /* For points, all interpolants are constant-valued.
1260 * However, for point sprites, we'll need to setup texcoords appropriately.
1261 * XXX: which coefficients are the texcoords???
1262 * We may do point sprites as textured quads...
1264 * KW: We don't know which coefficients are texcoords - ultimately
1265 * the choice of what interpolation mode to use for each attribute
1266 * should be determined by the fragment program, using
1267 * per-attribute declaration statements that include interpolation
1268 * mode as a parameter. So either the fragment program will have
1269 * to be adjusted for pointsprite vs normal point behaviour, or
1270 * otherwise a special interpolation mode will have to be defined
1271 * which matches the required behaviour for point sprites. But -
1272 * the latter is not a feature of normal hardware, and as such
1273 * probably should be ruled out on that basis.
1275 setup
->vprovoke
= v0
;
1278 const_pos_coeff(setup
, 0, 2);
1279 const_pos_coeff(setup
, 0, 3);
1281 for (fragSlot
= 0; fragSlot
< lpfs
->info
.num_inputs
; fragSlot
++) {
1282 const uint vertSlot
= vinfo
->attrib
[fragSlot
].src_index
;
1284 switch (vinfo
->attrib
[fragSlot
].interp_mode
) {
1285 case INTERP_CONSTANT
:
1288 const_coeff(setup
, fragSlot
, vertSlot
);
1290 case INTERP_PERSPECTIVE
:
1291 point_persp_coeff(setup
, setup
->vprovoke
, fragSlot
, vertSlot
);
1294 setup_fragcoord_coeff(setup
, fragSlot
);
1300 if (lpfs
->info
.input_semantic_name
[fragSlot
] == TGSI_SEMANTIC_FACE
) {
1301 setup
->coef
.a0
[1 + fragSlot
][0] = 1.0f
- setup
->facing
;
1302 setup
->coef
.dadx
[1 + fragSlot
][0] = 0.0;
1303 setup
->coef
.dady
[1 + fragSlot
][0] = 0.0;
1308 if (halfSize
<= 0.5 && !round
) {
1309 /* special case for 1-pixel points */
1310 const int ix
= ((int) x
) & 1;
1311 const int iy
= ((int) y
) & 1;
1312 setup
->quad
[0].input
.x0
= (int) x
- ix
;
1313 setup
->quad
[0].input
.y0
= (int) y
- iy
;
1314 setup
->quad
[0].inout
.mask
= (1 << ix
) << (2 * iy
);
1315 clip_emit_quad( setup
, &setup
->quad
[0] );
1319 /* rounded points */
1320 const int ixmin
= block((int) (x
- halfSize
));
1321 const int ixmax
= block((int) (x
+ halfSize
));
1322 const int iymin
= block((int) (y
- halfSize
));
1323 const int iymax
= block((int) (y
+ halfSize
));
1324 const float rmin
= halfSize
- 0.7071F
; /* 0.7071 = sqrt(2)/2 */
1325 const float rmax
= halfSize
+ 0.7071F
;
1326 const float rmin2
= MAX2(0.0F
, rmin
* rmin
);
1327 const float rmax2
= rmax
* rmax
;
1328 const float cscale
= 1.0F
/ (rmax2
- rmin2
);
1331 for (iy
= iymin
; iy
<= iymax
; iy
+= 2) {
1332 for (ix
= ixmin
; ix
<= ixmax
; ix
+= 2) {
1333 float dx
, dy
, dist2
, cover
;
1335 setup
->quad
[0].inout
.mask
= 0x0;
1337 dx
= (ix
+ 0.5f
) - x
;
1338 dy
= (iy
+ 0.5f
) - y
;
1339 dist2
= dx
* dx
+ dy
* dy
;
1340 if (dist2
<= rmax2
) {
1341 cover
= 1.0F
- (dist2
- rmin2
) * cscale
;
1342 setup
->quad
[0].input
.coverage
[QUAD_TOP_LEFT
] = MIN2(cover
, 1.0f
);
1343 setup
->quad
[0].inout
.mask
|= MASK_TOP_LEFT
;
1346 dx
= (ix
+ 1.5f
) - x
;
1347 dy
= (iy
+ 0.5f
) - y
;
1348 dist2
= dx
* dx
+ dy
* dy
;
1349 if (dist2
<= rmax2
) {
1350 cover
= 1.0F
- (dist2
- rmin2
) * cscale
;
1351 setup
->quad
[0].input
.coverage
[QUAD_TOP_RIGHT
] = MIN2(cover
, 1.0f
);
1352 setup
->quad
[0].inout
.mask
|= MASK_TOP_RIGHT
;
1355 dx
= (ix
+ 0.5f
) - x
;
1356 dy
= (iy
+ 1.5f
) - y
;
1357 dist2
= dx
* dx
+ dy
* dy
;
1358 if (dist2
<= rmax2
) {
1359 cover
= 1.0F
- (dist2
- rmin2
) * cscale
;
1360 setup
->quad
[0].input
.coverage
[QUAD_BOTTOM_LEFT
] = MIN2(cover
, 1.0f
);
1361 setup
->quad
[0].inout
.mask
|= MASK_BOTTOM_LEFT
;
1364 dx
= (ix
+ 1.5f
) - x
;
1365 dy
= (iy
+ 1.5f
) - y
;
1366 dist2
= dx
* dx
+ dy
* dy
;
1367 if (dist2
<= rmax2
) {
1368 cover
= 1.0F
- (dist2
- rmin2
) * cscale
;
1369 setup
->quad
[0].input
.coverage
[QUAD_BOTTOM_RIGHT
] = MIN2(cover
, 1.0f
);
1370 setup
->quad
[0].inout
.mask
|= MASK_BOTTOM_RIGHT
;
1373 if (setup
->quad
[0].inout
.mask
) {
1374 setup
->quad
[0].input
.x0
= ix
;
1375 setup
->quad
[0].input
.y0
= iy
;
1376 clip_emit_quad( setup
, &setup
->quad
[0] );
1383 const int xmin
= (int) (x
+ 0.75 - halfSize
);
1384 const int ymin
= (int) (y
+ 0.25 - halfSize
);
1385 const int xmax
= xmin
+ (int) size
;
1386 const int ymax
= ymin
+ (int) size
;
1387 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1388 const int ixmin
= block(xmin
);
1389 const int ixmax
= block(xmax
- 1);
1390 const int iymin
= block(ymin
);
1391 const int iymax
= block(ymax
- 1);
1395 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1397 for (iy
= iymin
; iy
<= iymax
; iy
+= 2) {
1400 /* above the top edge */
1401 rowMask
&= (MASK_BOTTOM_LEFT
| MASK_BOTTOM_RIGHT
);
1403 if (iy
+ 1 >= ymax
) {
1404 /* below the bottom edge */
1405 rowMask
&= (MASK_TOP_LEFT
| MASK_TOP_RIGHT
);
1408 for (ix
= ixmin
; ix
<= ixmax
; ix
+= 2) {
1409 uint mask
= rowMask
;
1412 /* fragment is past left edge of point, turn off left bits */
1413 mask
&= (MASK_BOTTOM_RIGHT
| MASK_TOP_RIGHT
);
1415 if (ix
+ 1 >= xmax
) {
1416 /* past the right edge */
1417 mask
&= (MASK_BOTTOM_LEFT
| MASK_TOP_LEFT
);
1420 setup
->quad
[0].inout
.mask
= mask
;
1421 setup
->quad
[0].input
.x0
= ix
;
1422 setup
->quad
[0].input
.y0
= iy
;
1423 clip_emit_quad( setup
, &setup
->quad
[0] );
1430 void llvmpipe_setup_prepare( struct setup_context
*setup
)
1432 struct llvmpipe_context
*lp
= setup
->llvmpipe
;
1435 llvmpipe_update_derived(lp
);
1438 if (lp
->reduced_api_prim
== PIPE_PRIM_TRIANGLES
&&
1439 lp
->rasterizer
->fill_cw
== PIPE_POLYGON_MODE_FILL
&&
1440 lp
->rasterizer
->fill_ccw
== PIPE_POLYGON_MODE_FILL
) {
1441 /* we'll do culling */
1442 setup
->winding
= lp
->rasterizer
->cull_mode
;
1445 /* 'draw' will do culling */
1446 setup
->winding
= PIPE_WINDING_NONE
;
1452 void llvmpipe_setup_destroy_context( struct setup_context
*setup
)
1454 align_free( setup
);
1459 * Create a new primitive setup/render stage.
1461 struct setup_context
*llvmpipe_setup_create_context( struct llvmpipe_context
*llvmpipe
)
1463 struct setup_context
*setup
;
1466 setup
= align_malloc(sizeof(struct setup_context
), 16);
1470 memset(setup
, 0, sizeof *setup
);
1471 setup
->llvmpipe
= llvmpipe
;
1473 for (i
= 0; i
< MAX_QUADS
; i
++) {
1474 setup
->quad
[i
].coef
= &setup
->coef
;
1477 setup
->span
.left
[0] = 1000000; /* greater than right[0] */
1478 setup
->span
.left
[1] = 1000000; /* greater than right[1] */