1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Triangle rendering within a tile.
32 #include <transpose_matrix4x4.h>
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_format.h"
35 #include "util/u_math.h"
36 #include "spu_colorpack.h"
38 #include "spu_texture.h"
43 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
44 typedef vector
unsigned int mask_t
;
54 * Simplified types taken from other parts of Gallium
56 struct vertex_header
{
64 #define CEILF(X) ((float) (int) ((X) + 0.99999))
67 #define QUAD_TOP_LEFT 0
68 #define QUAD_TOP_RIGHT 1
69 #define QUAD_BOTTOM_LEFT 2
70 #define QUAD_BOTTOM_RIGHT 3
71 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
72 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
73 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
74 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
84 float dx
; /**< X(v1) - X(v0), used only during setup */
85 float dy
; /**< Y(v1) - Y(v0), used only during setup */
86 float dxdy
; /**< dx/dy */
87 float sx
, sy
; /**< first sample point coord */
88 int lines
; /**< number of lines on this edge */
101 * Triangle setup info (derived from draw_stage).
102 * Also used for line drawing (taking some liberties).
106 /* Vertices are just an array of floats making up each attribute in
107 * turn. Currently fixed at 4 floats, but should change in time.
108 * Codegen will help cope with this.
110 const struct vertex_header
*vmax
;
111 const struct vertex_header
*vmid
;
112 const struct vertex_header
*vmin
;
113 const struct vertex_header
*vprovoke
;
125 int cliprect_minx
, cliprect_maxx
, cliprect_miny
, cliprect_maxy
;
128 struct tgsi_interp_coef coef
[PIPE_MAX_SHADER_INPUTS
];
130 struct interp_coef coef
[PIPE_MAX_SHADER_INPUTS
];
134 struct quad_header quad
;
138 int left
[2]; /**< [0] = row0, [1] = row1 */
142 unsigned mask
; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
148 static struct setup_stage setup
;
155 * Basically a cast wrapper.
157 static INLINE
struct setup_stage
*setup_stage( struct draw_stage
*stage
)
159 return (struct setup_stage
*)stage
;
165 * Clip setup.quad against the scissor/surface bounds.
168 quad_clip(struct setup_stage
*setup
)
170 const struct pipe_scissor_state
*cliprect
= &setup
.softpipe
->cliprect
;
171 const int minx
= (int) cliprect
->minx
;
172 const int maxx
= (int) cliprect
->maxx
;
173 const int miny
= (int) cliprect
->miny
;
174 const int maxy
= (int) cliprect
->maxy
;
176 if (setup
.quad
.x0
>= maxx
||
177 setup
.quad
.y0
>= maxy
||
178 setup
.quad
.x0
+ 1 < minx
||
179 setup
.quad
.y0
+ 1 < miny
) {
180 /* totally clipped */
181 setup
.quad
.mask
= 0x0;
184 if (setup
.quad
.x0
< minx
)
185 setup
.quad
.mask
&= (MASK_BOTTOM_RIGHT
| MASK_TOP_RIGHT
);
186 if (setup
.quad
.y0
< miny
)
187 setup
.quad
.mask
&= (MASK_BOTTOM_LEFT
| MASK_BOTTOM_RIGHT
);
188 if (setup
.quad
.x0
== maxx
- 1)
189 setup
.quad
.mask
&= (MASK_BOTTOM_LEFT
| MASK_TOP_LEFT
);
190 if (setup
.quad
.y0
== maxy
- 1)
191 setup
.quad
.mask
&= (MASK_TOP_LEFT
| MASK_TOP_RIGHT
);
197 * Emit a quad (pass to next stage) with clipping.
200 clip_emit_quad(struct setup_stage
*setup
)
203 if (setup
.quad
.mask
) {
204 struct softpipe_context
*sp
= setup
.softpipe
;
205 sp
->quad
.first
->run(sp
->quad
.first
, &setup
.quad
);
211 * Evaluate attribute coefficients (plane equations) to compute
212 * attribute values for the four fragments in a quad.
213 * Eg: four colors will be computed (in AoS format).
216 eval_coeff(uint slot
, float x
, float y
, vector
float result
[4])
218 switch (spu
.vertex_info
.interp_mode
[slot
]) {
219 case INTERP_CONSTANT
:
220 result
[QUAD_TOP_LEFT
] =
221 result
[QUAD_TOP_RIGHT
] =
222 result
[QUAD_BOTTOM_LEFT
] =
223 result
[QUAD_BOTTOM_RIGHT
] = setup
.coef
[slot
].a0
.v
;
227 /* fall-through, for now */
230 register vector
float dadx
= setup
.coef
[slot
].dadx
.v
;
231 register vector
float dady
= setup
.coef
[slot
].dady
.v
;
232 register vector
float topLeft
233 = spu_add(setup
.coef
[slot
].a0
.v
,
234 spu_add(spu_mul(spu_splats(x
), dadx
),
235 spu_mul(spu_splats(y
), dady
)));
237 result
[QUAD_TOP_LEFT
] = topLeft
;
238 result
[QUAD_TOP_RIGHT
] = spu_add(topLeft
, dadx
);
239 result
[QUAD_BOTTOM_LEFT
] = spu_add(topLeft
, dady
);
240 result
[QUAD_BOTTOM_RIGHT
] = spu_add(spu_add(topLeft
, dadx
), dady
);
247 * As above, but return 4 vectors in SOA format.
248 * XXX this will all be re-written someday.
251 eval_coeff_soa(uint slot
, float x
, float y
, vector
float result
[4])
253 eval_coeff(slot
, x
, y
, result
);
254 _transpose_matrix4x4(result
, result
);
259 static INLINE vector
float
260 eval_z(float x
, float y
)
263 const float dzdx
= setup
.coef
[slot
].dadx
.f
[2];
264 const float dzdy
= setup
.coef
[slot
].dady
.f
[2];
265 const float topLeft
= setup
.coef
[slot
].a0
.f
[2] + x
* dzdx
+ y
* dzdy
;
266 const vector
float topLeftv
= spu_splats(topLeft
);
267 const vector
float derivs
= (vector
float) { 0.0, dzdx
, dzdy
, dzdx
+ dzdy
};
268 return spu_add(topLeftv
, derivs
);
273 * Emit a quad (pass to next stage). No clipping is done.
274 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
275 * should be skipped. But adding the test for that slows things down
279 emit_quad( int x
, int y
, mask_t mask
)
281 /* If any bits in mask are set... */
282 if (spu_extract(spu_orx(mask
), 0)) {
283 const int ix
= x
- setup
.cliprect_minx
;
284 const int iy
= y
- setup
.cliprect_miny
;
286 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
287 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
289 if (0/*spu.texture[0].start*/) {
291 * Temporary texture mapping path
292 * This will go away when fragment programs support TEX inst.
295 vector
float colors
[4];
296 vector
float texcoords
[4];
297 eval_coeff(2, (float) x
, (float) y
, texcoords
);
299 if (spu_extract(mask
, 0))
300 colors
[0] = spu
.sample_texture
[unit
](unit
, texcoords
[0]);
301 if (spu_extract(mask
, 1))
302 colors
[1] = spu
.sample_texture
[unit
](unit
, texcoords
[1]);
303 if (spu_extract(mask
, 2))
304 colors
[2] = spu
.sample_texture
[unit
](unit
, texcoords
[2]);
305 if (spu_extract(mask
, 3))
306 colors
[3] = spu
.sample_texture
[unit
](unit
, texcoords
[3]);
309 if (spu
.texture
[1].start
) {
310 /* multi-texture mapping */
312 vector
float colors1
[4];
314 eval_coeff(2, (float) x
, (float) y
, texcoords
);
316 if (spu_extract(mask
, 0))
317 colors1
[0] = spu
.sample_texture
[unit
](unit
, texcoords
[0]);
318 if (spu_extract(mask
, 1))
319 colors1
[1] = spu
.sample_texture
[unit
](unit
, texcoords
[1]);
320 if (spu_extract(mask
, 2))
321 colors1
[2] = spu
.sample_texture
[unit
](unit
, texcoords
[2]);
322 if (spu_extract(mask
, 3))
323 colors1
[3] = spu
.sample_texture
[unit
](unit
, texcoords
[3]);
325 /* hack: modulate first texture by second */
326 colors
[0] = spu_mul(colors
[0], colors1
[0]);
327 colors
[1] = spu_mul(colors
[1], colors1
[1]);
328 colors
[2] = spu_mul(colors
[2], colors1
[2]);
329 colors
[3] = spu_mul(colors
[3], colors1
[3]);
333 /* Convert fragment data from AoS to SoA format.
334 * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
337 vector
float soa_frag
[4];
338 _transpose_matrix4x4(soa_frag
, colors
);
340 vector
float fragZ
= eval_z((float) x
, (float) y
);
342 /* Do all per-fragment/quad operations here, including:
343 * alpha test, z test, stencil test, blend and framebuffer writing.
345 spu
.fragment_ops(ix
, iy
, &spu
.ctile
, &spu
.ztile
,
347 soa_frag
[0], soa_frag
[1],
348 soa_frag
[2], soa_frag
[3],
356 * Run fragment shader, execute per-fragment ops, update fb/tile.
358 vector
float inputs
[4*4], outputs
[2*4];
359 vector
float fragZ
= eval_z((float) x
, (float) y
);
363 eval_coeff_soa(1, (float) x
, (float) y
, inputs
);
366 for (i
= 0; i
< spu
.vertex_info
.num_attribs
; i
++) {
367 eval_coeff_soa(i
+1, (float) x
, (float) y
, inputs
+ i
* 4);
370 ASSERT(spu
.fragment_program
);
371 ASSERT(spu
.fragment_ops
);
373 /* Execute the current fragment program */
374 spu
.fragment_program(inputs
, outputs
, spu
.constants
);
376 /* Execute per-fragment/quad operations, including:
377 * alpha test, z test, stencil test, blend and framebuffer writing.
379 spu
.fragment_ops(ix
, iy
, &spu
.ctile
, &spu
.ztile
,
393 * Given an X or Y coordinate, return the block/quad coordinate that it
396 static INLINE
int block( int x
)
403 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
404 * the triangle's bounds.
405 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
407 static INLINE mask_t
calculate_mask( int x
)
409 /* This is a little tricky.
410 * Use & instead of && to avoid branches.
411 * Use negation to convert true/false to ~0/0 values.
414 mask
= spu_insert(-((x
>= setup
.span
.left
[0]) & (x
< setup
.span
.right
[0])), mask
, 0);
415 mask
= spu_insert(-((x
+1 >= setup
.span
.left
[0]) & (x
+1 < setup
.span
.right
[0])), mask
, 1);
416 mask
= spu_insert(-((x
>= setup
.span
.left
[1]) & (x
< setup
.span
.right
[1])), mask
, 2);
417 mask
= spu_insert(-((x
+1 >= setup
.span
.left
[1]) & (x
+1 < setup
.span
.right
[1])), mask
, 3);
423 * Render a horizontal span of quads
425 static void flush_spans( void )
427 int minleft
, maxright
;
430 switch (setup
.span
.y_flags
) {
432 /* both odd and even lines written (both quad rows) */
433 minleft
= MIN2(setup
.span
.left
[0], setup
.span
.left
[1]);
434 maxright
= MAX2(setup
.span
.right
[0], setup
.span
.right
[1]);
438 /* only even line written (quad top row) */
439 minleft
= setup
.span
.left
[0];
440 maxright
= setup
.span
.right
[0];
444 /* only odd line written (quad bottom row) */
445 minleft
= setup
.span
.left
[1];
446 maxright
= setup
.span
.right
[1];
454 /* OK, we're very likely to need the tile data now.
455 * clear or finish waiting if needed.
457 if (spu
.cur_ctile_status
== TILE_STATUS_GETTING
) {
458 /* wait for mfc_get() to complete */
459 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
460 wait_on_mask(1 << TAG_READ_TILE_COLOR
);
461 spu
.cur_ctile_status
= TILE_STATUS_CLEAN
;
463 else if (spu
.cur_ctile_status
== TILE_STATUS_CLEAR
) {
464 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
465 clear_c_tile(&spu
.ctile
);
466 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
468 ASSERT(spu
.cur_ctile_status
!= TILE_STATUS_DEFINED
);
470 if (spu
.read_depth
) {
471 if (spu
.cur_ztile_status
== TILE_STATUS_GETTING
) {
472 /* wait for mfc_get() to complete */
473 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
474 wait_on_mask(1 << TAG_READ_TILE_Z
);
475 spu
.cur_ztile_status
= TILE_STATUS_CLEAN
;
477 else if (spu
.cur_ztile_status
== TILE_STATUS_CLEAR
) {
478 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
479 clear_z_tile(&spu
.ztile
);
480 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
482 ASSERT(spu
.cur_ztile_status
!= TILE_STATUS_DEFINED
);
485 /* XXX this loop could be moved into the above switch cases and
486 * calculate_mask() could be simplified a bit...
488 for (x
= block(minleft
); x
<= block(maxright
); x
+= 2) {
490 emit_quad( x
, setup
.span
.y
, calculate_mask( x
));
495 setup
.span
.y_flags
= 0;
496 setup
.span
.right
[0] = 0;
497 setup
.span
.right
[1] = 0;
501 static void print_vertex(const struct vertex_header
*v
)
504 fprintf(stderr
, "Vertex: (%p)\n", v
);
505 for (i
= 0; i
< setup
.quad
.nr_attrs
; i
++) {
506 fprintf(stderr
, " %d: %f %f %f %f\n", i
,
507 v
->data
[i
][0], v
->data
[i
][1], v
->data
[i
][2], v
->data
[i
][3]);
513 static boolean
setup_sort_vertices(const struct vertex_header
*v0
,
514 const struct vertex_header
*v1
,
515 const struct vertex_header
*v2
)
519 fprintf(stderr
, "Triangle:\n");
527 /* determine bottom to top order of vertices */
529 float y0
= spu_extract(v0
->data
[0], 1);
530 float y1
= spu_extract(v1
->data
[0], 1);
531 float y2
= spu_extract(v2
->data
[0], 1);
574 /* Check if triangle is completely outside the tile bounds */
575 if (spu_extract(setup
.vmin
->data
[0], 1) > setup
.cliprect_maxy
)
577 if (spu_extract(setup
.vmax
->data
[0], 1) < setup
.cliprect_miny
)
579 if (spu_extract(setup
.vmin
->data
[0], 0) < setup
.cliprect_minx
&&
580 spu_extract(setup
.vmid
->data
[0], 0) < setup
.cliprect_minx
&&
581 spu_extract(setup
.vmax
->data
[0], 0) < setup
.cliprect_minx
)
583 if (spu_extract(setup
.vmin
->data
[0], 0) > setup
.cliprect_maxx
&&
584 spu_extract(setup
.vmid
->data
[0], 0) > setup
.cliprect_maxx
&&
585 spu_extract(setup
.vmax
->data
[0], 0) > setup
.cliprect_maxx
)
588 setup
.ebot
.dx
= spu_extract(setup
.vmid
->data
[0], 0) - spu_extract(setup
.vmin
->data
[0], 0);
589 setup
.ebot
.dy
= spu_extract(setup
.vmid
->data
[0], 1) - spu_extract(setup
.vmin
->data
[0], 1);
590 setup
.emaj
.dx
= spu_extract(setup
.vmax
->data
[0], 0) - spu_extract(setup
.vmin
->data
[0], 0);
591 setup
.emaj
.dy
= spu_extract(setup
.vmax
->data
[0], 1) - spu_extract(setup
.vmin
->data
[0], 1);
592 setup
.etop
.dx
= spu_extract(setup
.vmax
->data
[0], 0) - spu_extract(setup
.vmid
->data
[0], 0);
593 setup
.etop
.dy
= spu_extract(setup
.vmax
->data
[0], 1) - spu_extract(setup
.vmid
->data
[0], 1);
596 * Compute triangle's area. Use 1/area to compute partial
597 * derivatives of attributes later.
599 * The area will be the same as prim->det, but the sign may be
600 * different depending on how the vertices get sorted above.
602 * To determine whether the primitive is front or back facing we
603 * use the prim->det value because its sign is correct.
606 const float area
= (setup
.emaj
.dx
* setup
.ebot
.dy
-
607 setup
.ebot
.dx
* setup
.emaj
.dy
);
609 setup
.oneoverarea
= 1.0f
/ area
;
611 _mesa_printf("%s one-over-area %f area %f det %f\n",
612 __FUNCTION__, setup.oneoverarea, area, prim->det );
617 /* We need to know if this is a front or back-facing triangle for:
618 * - the GLSL gl_FrontFacing fragment attribute (bool)
619 * - two-sided stencil test
621 setup
.quad
.facing
= (prim
->det
> 0.0) ^ (setup
.softpipe
->rasterizer
->front_winding
== PIPE_WINDING_CW
);
629 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
630 * The value value comes from vertex->data[slot].
631 * The result will be put into setup.coef[slot].a0.
632 * \param slot which attribute slot
635 const_coeff(uint slot
)
637 setup
.coef
[slot
].dadx
.v
= (vector
float) {0.0, 0.0, 0.0, 0.0};
638 setup
.coef
[slot
].dady
.v
= (vector
float) {0.0, 0.0, 0.0, 0.0};
639 setup
.coef
[slot
].a0
.v
= setup
.vprovoke
->data
[slot
];
644 * Compute a0, dadx and dady for a linearly interpolated coefficient,
648 tri_linear_coeff(uint slot
, uint firstComp
, uint lastComp
)
651 const float *vmin_d
= (float *) &setup
.vmin
->data
[slot
];
652 const float *vmid_d
= (float *) &setup
.vmid
->data
[slot
];
653 const float *vmax_d
= (float *) &setup
.vmax
->data
[slot
];
654 const float x
= spu_extract(setup
.vmin
->data
[0], 0) - 0.5f
;
655 const float y
= spu_extract(setup
.vmin
->data
[0], 1) - 0.5f
;
657 for (i
= firstComp
; i
< lastComp
; i
++) {
658 float botda
= vmid_d
[i
] - vmin_d
[i
];
659 float majda
= vmax_d
[i
] - vmin_d
[i
];
660 float a
= setup
.ebot
.dy
* majda
- botda
* setup
.emaj
.dy
;
661 float b
= setup
.emaj
.dx
* botda
- majda
* setup
.ebot
.dx
;
663 ASSERT(slot
< PIPE_MAX_SHADER_INPUTS
);
665 setup
.coef
[slot
].dadx
.f
[i
] = a
* setup
.oneoverarea
;
666 setup
.coef
[slot
].dady
.f
[i
] = b
* setup
.oneoverarea
;
668 /* calculate a0 as the value which would be sampled for the
669 * fragment at (0,0), taking into account that we want to sample at
670 * pixel centers, in other words (0.5, 0.5).
672 * this is neat but unfortunately not a good way to do things for
673 * triangles with very large values of dadx or dady as it will
674 * result in the subtraction and re-addition from a0 of a very
675 * large number, which means we'll end up loosing a lot of the
676 * fractional bits and precision from a0. the way to fix this is
677 * to define a0 as the sample at a pixel center somewhere near vmin
678 * instead - i'll switch to this later.
680 setup
.coef
[slot
].a0
.f
[i
] = (vmin_d
[i
] -
681 (setup
.coef
[slot
].dadx
.f
[i
] * x
+
682 setup
.coef
[slot
].dady
.f
[i
] * y
));
686 _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
688 setup.coef[slot].a0[i],
689 setup.coef[slot].dadx.f[i],
690 setup.coef[slot].dady.f[i]);
696 * As above, but interp setup all four vector components.
699 tri_linear_coeff4(uint slot
)
701 const vector
float vmin_d
= setup
.vmin
->data
[slot
];
702 const vector
float vmid_d
= setup
.vmid
->data
[slot
];
703 const vector
float vmax_d
= setup
.vmax
->data
[slot
];
704 const vector
float xxxx
= spu_splats(spu_extract(setup
.vmin
->data
[0], 0) - 0.5f
);
705 const vector
float yyyy
= spu_splats(spu_extract(setup
.vmin
->data
[0], 1) - 0.5f
);
707 vector
float botda
= vmid_d
- vmin_d
;
708 vector
float majda
= vmax_d
- vmin_d
;
710 vector
float a
= spu_sub(spu_mul(spu_splats(setup
.ebot
.dy
), majda
),
711 spu_mul(botda
, spu_splats(setup
.emaj
.dy
)));
712 vector
float b
= spu_sub(spu_mul(spu_splats(setup
.emaj
.dx
), botda
),
713 spu_mul(majda
, spu_splats(setup
.ebot
.dx
)));
715 setup
.coef
[slot
].dadx
.v
= spu_mul(a
, spu_splats(setup
.oneoverarea
));
716 setup
.coef
[slot
].dady
.v
= spu_mul(b
, spu_splats(setup
.oneoverarea
));
718 vector
float tempx
= spu_mul(setup
.coef
[slot
].dadx
.v
, xxxx
);
719 vector
float tempy
= spu_mul(setup
.coef
[slot
].dady
.v
, yyyy
);
721 setup
.coef
[slot
].a0
.v
= spu_sub(vmin_d
, spu_add(tempx
, tempy
));
728 * Compute a0, dadx and dady for a perspective-corrected interpolant,
730 * We basically multiply the vertex value by 1/w before computing
731 * the plane coefficients (a0, dadx, dady).
732 * Later, when we compute the value at a particular fragment position we'll
733 * divide the interpolated value by the interpolated W at that fragment.
735 static void tri_persp_coeff( unsigned slot
,
738 /* premultiply by 1/w:
740 float mina
= setup
.vmin
->data
[slot
][i
] * setup
.vmin
->data
[0][3];
741 float mida
= setup
.vmid
->data
[slot
][i
] * setup
.vmid
->data
[0][3];
742 float maxa
= setup
.vmax
->data
[slot
][i
] * setup
.vmax
->data
[0][3];
744 float botda
= mida
- mina
;
745 float majda
= maxa
- mina
;
746 float a
= setup
.ebot
.dy
* majda
- botda
* setup
.emaj
.dy
;
747 float b
= setup
.emaj
.dx
* botda
- majda
* setup
.ebot
.dx
;
750 printf("tri persp %d,%d: %f %f %f\n", slot, i,
751 setup.vmin->data[slot][i],
752 setup.vmid->data[slot][i],
753 setup.vmax->data[slot][i]
757 assert(slot
< PIPE_MAX_SHADER_INPUTS
);
760 setup
.coef
[slot
].dadx
.f
[i
] = a
* setup
.oneoverarea
;
761 setup
.coef
[slot
].dady
.f
[i
] = b
* setup
.oneoverarea
;
762 setup
.coef
[slot
].a0
.f
[i
] = (mina
-
763 (setup
.coef
[slot
].dadx
.f
[i
] * (setup
.vmin
->data
[0][0] - 0.5f
) +
764 setup
.coef
[slot
].dady
.f
[i
] * (setup
.vmin
->data
[0][1] - 0.5f
)));
770 * Compute the setup.coef[] array dadx, dady, a0 values.
771 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
773 static void setup_tri_coefficients(void)
778 for (i
= 0; i
< spu
.vertex_info
.num_attribs
; i
++) {
779 switch (spu
.vertex_info
.interp_mode
[i
]) {
783 /*tri_linear_coeff(i, 2, 3);*/
784 /* XXX interp W if PERSPECTIVE... */
785 tri_linear_coeff4(i
);
787 case INTERP_CONSTANT
:
791 tri_linear_coeff4(i
);
793 case INTERP_PERSPECTIVE
:
794 tri_linear_coeff4(i
); /* temporary */
801 ASSERT(spu
.vertex_info
.interp_mode
[0] == INTERP_POS
);
802 ASSERT(spu
.vertex_info
.interp_mode
[1] == INTERP_LINEAR
||
803 spu
.vertex_info
.interp_mode
[1] == INTERP_CONSTANT
);
804 tri_linear_coeff(0, 2, 3); /* slot 0, z */
805 tri_linear_coeff(1, 0, 4); /* slot 1, color */
810 static void setup_tri_edges(void)
812 float vmin_x
= spu_extract(setup
.vmin
->data
[0], 0) + 0.5f
;
813 float vmid_x
= spu_extract(setup
.vmid
->data
[0], 0) + 0.5f
;
815 float vmin_y
= spu_extract(setup
.vmin
->data
[0], 1) - 0.5f
;
816 float vmid_y
= spu_extract(setup
.vmid
->data
[0], 1) - 0.5f
;
817 float vmax_y
= spu_extract(setup
.vmax
->data
[0], 1) - 0.5f
;
819 setup
.emaj
.sy
= CEILF(vmin_y
);
820 setup
.emaj
.lines
= (int) CEILF(vmax_y
- setup
.emaj
.sy
);
821 setup
.emaj
.dxdy
= setup
.emaj
.dx
/ setup
.emaj
.dy
;
822 setup
.emaj
.sx
= vmin_x
+ (setup
.emaj
.sy
- vmin_y
) * setup
.emaj
.dxdy
;
824 setup
.etop
.sy
= CEILF(vmid_y
);
825 setup
.etop
.lines
= (int) CEILF(vmax_y
- setup
.etop
.sy
);
826 setup
.etop
.dxdy
= setup
.etop
.dx
/ setup
.etop
.dy
;
827 setup
.etop
.sx
= vmid_x
+ (setup
.etop
.sy
- vmid_y
) * setup
.etop
.dxdy
;
829 setup
.ebot
.sy
= CEILF(vmin_y
);
830 setup
.ebot
.lines
= (int) CEILF(vmid_y
- setup
.ebot
.sy
);
831 setup
.ebot
.dxdy
= setup
.ebot
.dx
/ setup
.ebot
.dy
;
832 setup
.ebot
.sx
= vmin_x
+ (setup
.ebot
.sy
- vmin_y
) * setup
.ebot
.dxdy
;
837 * Render the upper or lower half of a triangle.
838 * Scissoring/cliprect is applied here too.
840 static void subtriangle( struct edge
*eleft
,
844 const int minx
= setup
.cliprect_minx
;
845 const int maxx
= setup
.cliprect_maxx
;
846 const int miny
= setup
.cliprect_miny
;
847 const int maxy
= setup
.cliprect_maxy
;
848 int y
, start_y
, finish_y
;
849 int sy
= (int)eleft
->sy
;
851 ASSERT((int)eleft
->sy
== (int) eright
->sy
);
853 /* clip top/bottom */
855 finish_y
= sy
+ lines
;
867 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
870 for (y
= start_y
; y
< finish_y
; y
++) {
872 /* avoid accumulating adds as floats don't have the precision to
873 * accurately iterate large triangle edges that way. luckily we
874 * can just multiply these days.
876 * this is all drowned out by the attribute interpolation anyway.
878 int left
= (int)(eleft
->sx
+ y
* eleft
->dxdy
);
879 int right
= (int)(eright
->sx
+ y
* eright
->dxdy
);
881 /* clip left/right */
889 if (block(_y
) != setup
.span
.y
) {
891 setup
.span
.y
= block(_y
);
894 setup
.span
.left
[_y
&1] = left
;
895 setup
.span
.right
[_y
&1] = right
;
896 setup
.span
.y_flags
|= 1<<(_y
&1);
901 /* save the values so that emaj can be restarted:
903 eleft
->sx
+= lines
* eleft
->dxdy
;
904 eright
->sx
+= lines
* eright
->dxdy
;
910 determinant( const float *v0
,
914 /* edge vectors e = v0 - v2, f = v1 - v2 */
915 const float ex
= v0
[0] - v2
[0];
916 const float ey
= v0
[1] - v2
[1];
917 const float fx
= v1
[0] - v2
[0];
918 const float fy
= v1
[1] - v2
[1];
920 /* det = cross(e,f).z */
921 return ex
* fy
- ey
* fx
;
926 * Draw triangle into tile at (tx, ty) (tile coords)
927 * The tile data should have already been fetched.
930 tri_draw(const float *v0
, const float *v1
, const float *v2
, uint tx
, uint ty
, uint front_winding
)
935 /* set clipping bounds to tile bounds */
936 setup
.cliprect_minx
= tx
* TILE_SIZE
;
937 setup
.cliprect_miny
= ty
* TILE_SIZE
;
938 setup
.cliprect_maxx
= (tx
+ 1) * TILE_SIZE
;
939 setup
.cliprect_maxy
= (ty
+ 1) * TILE_SIZE
;
941 /* Before we sort vertices, determine the facing of the triangle,
942 * which will be needed for front/back-face stencil application
944 float det
= determinant(v0
, v1
, v2
);
945 setup
.facing
= (det
> 0.0) ^ (front_winding
== PIPE_WINDING_CW
);
947 if (!setup_sort_vertices((struct vertex_header
*) v0
,
948 (struct vertex_header
*) v1
,
949 (struct vertex_header
*) v2
)) {
950 return FALSE
; /* totally clipped */
953 setup_tri_coefficients();
957 setup
.span
.y_flags
= 0;
958 setup
.span
.right
[0] = 0;
959 setup
.span
.right
[1] = 0;
960 /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
962 /* init_constant_attribs( setup ); */
964 if (setup
.oneoverarea
< 0.0) {
967 subtriangle( &setup
.emaj
, &setup
.ebot
, setup
.ebot
.lines
);
968 subtriangle( &setup
.emaj
, &setup
.etop
, setup
.etop
.lines
);
973 subtriangle( &setup
.ebot
, &setup
.emaj
, setup
.ebot
.lines
);
974 subtriangle( &setup
.etop
, &setup
.emaj
, setup
.etop
.lines
);