1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Triangle rendering within a tile.
32 #include <transpose_matrix4x4.h>
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_format.h"
35 #include "pipe/p_util.h"
36 #include "spu_colorpack.h"
38 #include "spu_texture.h"
41 #include "spu_per_fragment_op.h"
44 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
45 typedef vector
unsigned int mask_t
;
55 * Simplified types taken from other parts of Gallium
57 struct vertex_header
{
65 #define CEILF(X) ((float) (int) ((X) + 0.99999))
68 #define QUAD_TOP_LEFT 0
69 #define QUAD_TOP_RIGHT 1
70 #define QUAD_BOTTOM_LEFT 2
71 #define QUAD_BOTTOM_RIGHT 3
72 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
73 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
74 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
75 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
85 float dx
; /**< X(v1) - X(v0), used only during setup */
86 float dy
; /**< Y(v1) - Y(v0), used only during setup */
87 float dxdy
; /**< dx/dy */
88 float sx
, sy
; /**< first sample point coord */
89 int lines
; /**< number of lines on this edge */
102 * Triangle setup info (derived from draw_stage).
103 * Also used for line drawing (taking some liberties).
107 /* Vertices are just an array of floats making up each attribute in
108 * turn. Currently fixed at 4 floats, but should change in time.
109 * Codegen will help cope with this.
111 const struct vertex_header
*vmax
;
112 const struct vertex_header
*vmid
;
113 const struct vertex_header
*vmin
;
114 const struct vertex_header
*vprovoke
;
124 int cliprect_minx
, cliprect_maxx
, cliprect_miny
, cliprect_maxy
;
127 struct tgsi_interp_coef coef
[PIPE_MAX_SHADER_INPUTS
];
129 struct interp_coef coef
[PIPE_MAX_SHADER_INPUTS
];
133 struct quad_header quad
;
137 int left
[2]; /**< [0] = row0, [1] = row1 */
141 unsigned mask
; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
147 static struct setup_stage setup
;
154 * Basically a cast wrapper.
156 static INLINE
struct setup_stage
*setup_stage( struct draw_stage
*stage
)
158 return (struct setup_stage
*)stage
;
164 * Clip setup.quad against the scissor/surface bounds.
167 quad_clip(struct setup_stage
*setup
)
169 const struct pipe_scissor_state
*cliprect
= &setup
.softpipe
->cliprect
;
170 const int minx
= (int) cliprect
->minx
;
171 const int maxx
= (int) cliprect
->maxx
;
172 const int miny
= (int) cliprect
->miny
;
173 const int maxy
= (int) cliprect
->maxy
;
175 if (setup
.quad
.x0
>= maxx
||
176 setup
.quad
.y0
>= maxy
||
177 setup
.quad
.x0
+ 1 < minx
||
178 setup
.quad
.y0
+ 1 < miny
) {
179 /* totally clipped */
180 setup
.quad
.mask
= 0x0;
183 if (setup
.quad
.x0
< minx
)
184 setup
.quad
.mask
&= (MASK_BOTTOM_RIGHT
| MASK_TOP_RIGHT
);
185 if (setup
.quad
.y0
< miny
)
186 setup
.quad
.mask
&= (MASK_BOTTOM_LEFT
| MASK_BOTTOM_RIGHT
);
187 if (setup
.quad
.x0
== maxx
- 1)
188 setup
.quad
.mask
&= (MASK_BOTTOM_LEFT
| MASK_TOP_LEFT
);
189 if (setup
.quad
.y0
== maxy
- 1)
190 setup
.quad
.mask
&= (MASK_TOP_LEFT
| MASK_TOP_RIGHT
);
196 * Emit a quad (pass to next stage) with clipping.
199 clip_emit_quad(struct setup_stage
*setup
)
202 if (setup
.quad
.mask
) {
203 struct softpipe_context
*sp
= setup
.softpipe
;
204 sp
->quad
.first
->run(sp
->quad
.first
, &setup
.quad
);
210 * Evaluate attribute coefficients (plane equations) to compute
211 * attribute values for the four fragments in a quad.
212 * Eg: four colors will be compute.
215 eval_coeff(uint slot
, float x
, float y
, vector
float result
[4])
217 switch (spu
.vertex_info
.interp_mode
[slot
]) {
218 case INTERP_CONSTANT
:
219 result
[QUAD_TOP_LEFT
] =
220 result
[QUAD_TOP_RIGHT
] =
221 result
[QUAD_BOTTOM_LEFT
] =
222 result
[QUAD_BOTTOM_RIGHT
] = setup
.coef
[slot
].a0
.v
;
226 /* fall-through, for now */
229 register vector
float dadx
= setup
.coef
[slot
].dadx
.v
;
230 register vector
float dady
= setup
.coef
[slot
].dady
.v
;
231 register vector
float topLeft
232 = spu_add(setup
.coef
[slot
].a0
.v
,
233 spu_add(spu_mul(spu_splats(x
), dadx
),
234 spu_mul(spu_splats(y
), dady
)));
236 result
[QUAD_TOP_LEFT
] = topLeft
;
237 result
[QUAD_TOP_RIGHT
] = spu_add(topLeft
, dadx
);
238 result
[QUAD_BOTTOM_LEFT
] = spu_add(topLeft
, dady
);
239 result
[QUAD_BOTTOM_RIGHT
] = spu_add(spu_add(topLeft
, dadx
), dady
);
245 static INLINE vector
float
246 eval_z(float x
, float y
)
249 const float dzdx
= setup
.coef
[slot
].dadx
.f
[2];
250 const float dzdy
= setup
.coef
[slot
].dady
.f
[2];
251 const float topLeft
= setup
.coef
[slot
].a0
.f
[2] + x
* dzdx
+ y
* dzdy
;
252 const vector
float topLeftv
= spu_splats(topLeft
);
253 const vector
float derivs
= (vector
float) { 0.0, dzdx
, dzdy
, dzdx
+ dzdy
};
254 return spu_add(topLeftv
, derivs
);
259 do_depth_test(int x
, int y
, mask_t quadmask
)
264 if (spu
.fb
.depth_format
== PIPE_FORMAT_NONE
)
267 zvals
.v
= eval_z((float) x
, (float) y
);
269 mask
= (mask_t
) spu_do_depth_stencil(x
- setup
.cliprect_minx
,
270 y
- setup
.cliprect_miny
,
273 (qword
) spu_splats((unsigned char) 0x0ffu
),
274 (qword
) spu_splats((unsigned int) 0x01u
));
276 if (spu_extract(spu_orx(mask
), 0))
277 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
284 * Emit a quad (pass to next stage). No clipping is done.
285 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
286 * should be skipped. But adding the test for that slows things down
290 emit_quad( int x
, int y
, mask_t mask
)
293 struct softpipe_context
*sp
= setup
.softpipe
;
296 setup
.quad
.mask
= mask
;
297 sp
->quad
.first
->run(sp
->quad
.first
, &setup
.quad
);
300 if (spu
.read_depth
) {
301 mask
= do_depth_test(x
, y
, mask
);
304 /* If any bits in mask are set... */
305 if (spu_extract(spu_orx(mask
), 0)) {
306 const int ix
= x
- setup
.cliprect_minx
;
307 const int iy
= y
- setup
.cliprect_miny
;
308 vector
float colors
[4];
310 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
312 if (spu
.texture
[0].start
) {
313 /* texture mapping */
315 vector
float texcoords
[4];
316 eval_coeff(2, (float) x
, (float) y
, texcoords
);
318 if (spu_extract(mask
, 0))
319 colors
[0] = spu
.sample_texture
[unit
](unit
, texcoords
[0]);
320 if (spu_extract(mask
, 1))
321 colors
[1] = spu
.sample_texture
[unit
](unit
, texcoords
[1]);
322 if (spu_extract(mask
, 2))
323 colors
[2] = spu
.sample_texture
[unit
](unit
, texcoords
[2]);
324 if (spu_extract(mask
, 3))
325 colors
[3] = spu
.sample_texture
[unit
](unit
, texcoords
[3]);
328 if (spu
.texture
[1].start
) {
329 /* multi-texture mapping */
331 vector
float colors1
[4];
333 eval_coeff(2, (float) x
, (float) y
, texcoords
);
335 if (spu_extract(mask
, 0))
336 colors1
[0] = spu
.sample_texture
[unit
](unit
, texcoords
[0]);
337 if (spu_extract(mask
, 1))
338 colors1
[1] = spu
.sample_texture
[unit
](unit
, texcoords
[1]);
339 if (spu_extract(mask
, 2))
340 colors1
[2] = spu
.sample_texture
[unit
](unit
, texcoords
[2]);
341 if (spu_extract(mask
, 3))
342 colors1
[3] = spu
.sample_texture
[unit
](unit
, texcoords
[3]);
344 /* hack: modulate first texture by second */
345 colors
[0] = spu_mul(colors
[0], colors1
[0]);
346 colors
[1] = spu_mul(colors
[1], colors1
[1]);
347 colors
[2] = spu_mul(colors
[2], colors1
[2]);
348 colors
[3] = spu_mul(colors
[3], colors1
[3]);
354 eval_coeff(1, (float) x
, (float) y
, colors
);
358 /* Convert fragment data from AoS to SoA format.
361 _transpose_matrix4x4((vec_float4
*) soa_frag
, colors
);
363 /* Read the current framebuffer values.
365 const qword pix
[4] = {
366 (qword
) spu_splats(spu
.ctile
.ui
[iy
+0][ix
+0]),
367 (qword
) spu_splats(spu
.ctile
.ui
[iy
+0][ix
+1]),
368 (qword
) spu_splats(spu
.ctile
.ui
[iy
+1][ix
+0]),
369 (qword
) spu_splats(spu
.ctile
.ui
[iy
+1][ix
+1]),
375 /* Convert pixel data from AoS to SoA format.
377 vec_float4 aos_pix
[4] = {
378 spu_unpack_A8R8G8B8(spu
.ctile
.ui
[iy
+0][ix
+0]),
379 spu_unpack_A8R8G8B8(spu
.ctile
.ui
[iy
+0][ix
+1]),
380 spu_unpack_A8R8G8B8(spu
.ctile
.ui
[iy
+1][ix
+0]),
381 spu_unpack_A8R8G8B8(spu
.ctile
.ui
[iy
+1][ix
+1]),
384 _transpose_matrix4x4((vec_float4
*) soa_pix
, aos_pix
);
388 struct spu_blend_results result
=
389 (*spu
.blend
)(soa_frag
[0], soa_frag
[1], soa_frag
[2], soa_frag
[3],
390 soa_pix
[0], soa_pix
[1], soa_pix
[2], soa_pix
[3],
391 spu
.const_blend_color
[0], spu
.const_blend_color
[1],
392 spu
.const_blend_color
[2], spu
.const_blend_color
[3]);
395 /* Convert final pixel data from SoA to AoS format.
397 result
= (*spu
.logicop
)(pix
[0], pix
[1], pix
[2], pix
[3],
398 result
.r
, result
.g
, result
.b
, result
.a
,
401 spu
.ctile
.ui
[iy
+0][ix
+0] = spu_extract((vec_uint4
) result
.r
, 0);
402 spu
.ctile
.ui
[iy
+0][ix
+1] = spu_extract((vec_uint4
) result
.g
, 0);
403 spu
.ctile
.ui
[iy
+1][ix
+0] = spu_extract((vec_uint4
) result
.b
, 0);
404 spu
.ctile
.ui
[iy
+1][ix
+1] = spu_extract((vec_uint4
) result
.a
, 0);
411 * Given an X or Y coordinate, return the block/quad coordinate that it
414 static INLINE
int block( int x
)
421 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
422 * the triangle's bounds.
423 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
425 static INLINE mask_t
calculate_mask( int x
)
427 /* This is a little tricky.
428 * Use & instead of && to avoid branches.
429 * Use negation to convert true/false to ~0/0 values.
432 mask
= spu_insert(-((x
>= setup
.span
.left
[0]) & (x
< setup
.span
.right
[0])), mask
, 0);
433 mask
= spu_insert(-((x
+1 >= setup
.span
.left
[0]) & (x
+1 < setup
.span
.right
[0])), mask
, 1);
434 mask
= spu_insert(-((x
>= setup
.span
.left
[1]) & (x
< setup
.span
.right
[1])), mask
, 2);
435 mask
= spu_insert(-((x
+1 >= setup
.span
.left
[1]) & (x
+1 < setup
.span
.right
[1])), mask
, 3);
441 * Render a horizontal span of quads
443 static void flush_spans( void )
445 int minleft
, maxright
;
448 switch (setup
.span
.y_flags
) {
450 /* both odd and even lines written (both quad rows) */
451 minleft
= MIN2(setup
.span
.left
[0], setup
.span
.left
[1]);
452 maxright
= MAX2(setup
.span
.right
[0], setup
.span
.right
[1]);
456 /* only even line written (quad top row) */
457 minleft
= setup
.span
.left
[0];
458 maxright
= setup
.span
.right
[0];
462 /* only odd line written (quad bottom row) */
463 minleft
= setup
.span
.left
[1];
464 maxright
= setup
.span
.right
[1];
472 /* OK, we're very likely to need the tile data now.
473 * clear or finish waiting if needed.
475 if (spu
.cur_ctile_status
== TILE_STATUS_GETTING
) {
476 /* wait for mfc_get() to complete */
477 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
478 wait_on_mask(1 << TAG_READ_TILE_COLOR
);
479 spu
.cur_ctile_status
= TILE_STATUS_CLEAN
;
481 else if (spu
.cur_ctile_status
== TILE_STATUS_CLEAR
) {
482 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
483 clear_c_tile(&spu
.ctile
);
484 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
486 ASSERT(spu
.cur_ctile_status
!= TILE_STATUS_DEFINED
);
488 if (spu
.read_depth
) {
489 if (spu
.cur_ztile_status
== TILE_STATUS_GETTING
) {
490 /* wait for mfc_get() to complete */
491 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
492 wait_on_mask(1 << TAG_READ_TILE_Z
);
493 spu
.cur_ztile_status
= TILE_STATUS_CLEAN
;
495 else if (spu
.cur_ztile_status
== TILE_STATUS_CLEAR
) {
496 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
497 clear_z_tile(&spu
.ztile
);
498 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
500 ASSERT(spu
.cur_ztile_status
!= TILE_STATUS_DEFINED
);
503 /* XXX this loop could be moved into the above switch cases and
504 * calculate_mask() could be simplified a bit...
506 for (x
= block(minleft
); x
<= block(maxright
); x
+= 2) {
508 emit_quad( x
, setup
.span
.y
, calculate_mask( x
) );
513 setup
.span
.y_flags
= 0;
514 setup
.span
.right
[0] = 0;
515 setup
.span
.right
[1] = 0;
519 static void print_vertex(const struct vertex_header
*v
)
522 fprintf(stderr
, "Vertex: (%p)\n", v
);
523 for (i
= 0; i
< setup
.quad
.nr_attrs
; i
++) {
524 fprintf(stderr
, " %d: %f %f %f %f\n", i
,
525 v
->data
[i
][0], v
->data
[i
][1], v
->data
[i
][2], v
->data
[i
][3]);
531 static boolean
setup_sort_vertices(const struct vertex_header
*v0
,
532 const struct vertex_header
*v1
,
533 const struct vertex_header
*v2
)
537 fprintf(stderr
, "Triangle:\n");
545 /* determine bottom to top order of vertices */
547 float y0
= spu_extract(v0
->data
[0], 1);
548 float y1
= spu_extract(v1
->data
[0], 1);
549 float y2
= spu_extract(v2
->data
[0], 1);
592 /* Check if triangle is completely outside the tile bounds */
593 if (spu_extract(setup
.vmin
->data
[0], 1) > setup
.cliprect_maxy
)
595 if (spu_extract(setup
.vmax
->data
[0], 1) < setup
.cliprect_miny
)
597 if (spu_extract(setup
.vmin
->data
[0], 0) < setup
.cliprect_minx
&&
598 spu_extract(setup
.vmid
->data
[0], 0) < setup
.cliprect_minx
&&
599 spu_extract(setup
.vmax
->data
[0], 0) < setup
.cliprect_minx
)
601 if (spu_extract(setup
.vmin
->data
[0], 0) > setup
.cliprect_maxx
&&
602 spu_extract(setup
.vmid
->data
[0], 0) > setup
.cliprect_maxx
&&
603 spu_extract(setup
.vmax
->data
[0], 0) > setup
.cliprect_maxx
)
606 setup
.ebot
.dx
= spu_extract(setup
.vmid
->data
[0], 0) - spu_extract(setup
.vmin
->data
[0], 0);
607 setup
.ebot
.dy
= spu_extract(setup
.vmid
->data
[0], 1) - spu_extract(setup
.vmin
->data
[0], 1);
608 setup
.emaj
.dx
= spu_extract(setup
.vmax
->data
[0], 0) - spu_extract(setup
.vmin
->data
[0], 0);
609 setup
.emaj
.dy
= spu_extract(setup
.vmax
->data
[0], 1) - spu_extract(setup
.vmin
->data
[0], 1);
610 setup
.etop
.dx
= spu_extract(setup
.vmax
->data
[0], 0) - spu_extract(setup
.vmid
->data
[0], 0);
611 setup
.etop
.dy
= spu_extract(setup
.vmax
->data
[0], 1) - spu_extract(setup
.vmid
->data
[0], 1);
614 * Compute triangle's area. Use 1/area to compute partial
615 * derivatives of attributes later.
617 * The area will be the same as prim->det, but the sign may be
618 * different depending on how the vertices get sorted above.
620 * To determine whether the primitive is front or back facing we
621 * use the prim->det value because its sign is correct.
624 const float area
= (setup
.emaj
.dx
* setup
.ebot
.dy
-
625 setup
.ebot
.dx
* setup
.emaj
.dy
);
627 setup
.oneoverarea
= 1.0f
/ area
;
629 _mesa_printf("%s one-over-area %f area %f det %f\n",
630 __FUNCTION__, setup.oneoverarea, area, prim->det );
635 /* We need to know if this is a front or back-facing triangle for:
636 * - the GLSL gl_FrontFacing fragment attribute (bool)
637 * - two-sided stencil test
639 setup
.quad
.facing
= (prim
->det
> 0.0) ^ (setup
.softpipe
->rasterizer
->front_winding
== PIPE_WINDING_CW
);
647 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
648 * The value value comes from vertex->data[slot].
649 * The result will be put into setup.coef[slot].a0.
650 * \param slot which attribute slot
653 const_coeff(uint slot
)
655 setup
.coef
[slot
].dadx
.v
= (vector
float) {0.0, 0.0, 0.0, 0.0};
656 setup
.coef
[slot
].dady
.v
= (vector
float) {0.0, 0.0, 0.0, 0.0};
657 setup
.coef
[slot
].a0
.v
= setup
.vprovoke
->data
[slot
];
662 * Compute a0, dadx and dady for a linearly interpolated coefficient,
666 tri_linear_coeff(uint slot
, uint firstComp
, uint lastComp
)
669 const float *vmin_d
= (float *) &setup
.vmin
->data
[slot
];
670 const float *vmid_d
= (float *) &setup
.vmid
->data
[slot
];
671 const float *vmax_d
= (float *) &setup
.vmax
->data
[slot
];
672 const float x
= spu_extract(setup
.vmin
->data
[0], 0) - 0.5f
;
673 const float y
= spu_extract(setup
.vmin
->data
[0], 1) - 0.5f
;
675 for (i
= firstComp
; i
< lastComp
; i
++) {
676 float botda
= vmid_d
[i
] - vmin_d
[i
];
677 float majda
= vmax_d
[i
] - vmin_d
[i
];
678 float a
= setup
.ebot
.dy
* majda
- botda
* setup
.emaj
.dy
;
679 float b
= setup
.emaj
.dx
* botda
- majda
* setup
.ebot
.dx
;
681 ASSERT(slot
< PIPE_MAX_SHADER_INPUTS
);
683 setup
.coef
[slot
].dadx
.f
[i
] = a
* setup
.oneoverarea
;
684 setup
.coef
[slot
].dady
.f
[i
] = b
* setup
.oneoverarea
;
686 /* calculate a0 as the value which would be sampled for the
687 * fragment at (0,0), taking into account that we want to sample at
688 * pixel centers, in other words (0.5, 0.5).
690 * this is neat but unfortunately not a good way to do things for
691 * triangles with very large values of dadx or dady as it will
692 * result in the subtraction and re-addition from a0 of a very
693 * large number, which means we'll end up loosing a lot of the
694 * fractional bits and precision from a0. the way to fix this is
695 * to define a0 as the sample at a pixel center somewhere near vmin
696 * instead - i'll switch to this later.
698 setup
.coef
[slot
].a0
.f
[i
] = (vmin_d
[i
] -
699 (setup
.coef
[slot
].dadx
.f
[i
] * x
+
700 setup
.coef
[slot
].dady
.f
[i
] * y
));
704 _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
706 setup.coef[slot].a0[i],
707 setup.coef[slot].dadx.f[i],
708 setup.coef[slot].dady.f[i]);
714 * As above, but interp setup all four vector components.
717 tri_linear_coeff4(uint slot
)
719 const vector
float vmin_d
= setup
.vmin
->data
[slot
];
720 const vector
float vmid_d
= setup
.vmid
->data
[slot
];
721 const vector
float vmax_d
= setup
.vmax
->data
[slot
];
722 const vector
float xxxx
= spu_splats(spu_extract(setup
.vmin
->data
[0], 0) - 0.5f
);
723 const vector
float yyyy
= spu_splats(spu_extract(setup
.vmin
->data
[0], 1) - 0.5f
);
725 vector
float botda
= vmid_d
- vmin_d
;
726 vector
float majda
= vmax_d
- vmin_d
;
728 vector
float a
= spu_sub(spu_mul(spu_splats(setup
.ebot
.dy
), majda
),
729 spu_mul(botda
, spu_splats(setup
.emaj
.dy
)));
730 vector
float b
= spu_sub(spu_mul(spu_splats(setup
.emaj
.dx
), botda
),
731 spu_mul(majda
, spu_splats(setup
.ebot
.dx
)));
733 setup
.coef
[slot
].dadx
.v
= spu_mul(a
, spu_splats(setup
.oneoverarea
));
734 setup
.coef
[slot
].dady
.v
= spu_mul(b
, spu_splats(setup
.oneoverarea
));
736 vector
float tempx
= spu_mul(setup
.coef
[slot
].dadx
.v
, xxxx
);
737 vector
float tempy
= spu_mul(setup
.coef
[slot
].dady
.v
, yyyy
);
739 setup
.coef
[slot
].a0
.v
= spu_sub(vmin_d
, spu_add(tempx
, tempy
));
746 * Compute a0, dadx and dady for a perspective-corrected interpolant,
748 * We basically multiply the vertex value by 1/w before computing
749 * the plane coefficients (a0, dadx, dady).
750 * Later, when we compute the value at a particular fragment position we'll
751 * divide the interpolated value by the interpolated W at that fragment.
753 static void tri_persp_coeff( unsigned slot
,
756 /* premultiply by 1/w:
758 float mina
= setup
.vmin
->data
[slot
][i
] * setup
.vmin
->data
[0][3];
759 float mida
= setup
.vmid
->data
[slot
][i
] * setup
.vmid
->data
[0][3];
760 float maxa
= setup
.vmax
->data
[slot
][i
] * setup
.vmax
->data
[0][3];
762 float botda
= mida
- mina
;
763 float majda
= maxa
- mina
;
764 float a
= setup
.ebot
.dy
* majda
- botda
* setup
.emaj
.dy
;
765 float b
= setup
.emaj
.dx
* botda
- majda
* setup
.ebot
.dx
;
768 printf("tri persp %d,%d: %f %f %f\n", slot, i,
769 setup.vmin->data[slot][i],
770 setup.vmid->data[slot][i],
771 setup.vmax->data[slot][i]
775 assert(slot
< PIPE_MAX_SHADER_INPUTS
);
778 setup
.coef
[slot
].dadx
.f
[i
] = a
* setup
.oneoverarea
;
779 setup
.coef
[slot
].dady
.f
[i
] = b
* setup
.oneoverarea
;
780 setup
.coef
[slot
].a0
.f
[i
] = (mina
-
781 (setup
.coef
[slot
].dadx
.f
[i
] * (setup
.vmin
->data
[0][0] - 0.5f
) +
782 setup
.coef
[slot
].dady
.f
[i
] * (setup
.vmin
->data
[0][1] - 0.5f
)));
788 * Compute the setup.coef[] array dadx, dady, a0 values.
789 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
791 static void setup_tri_coefficients(void)
796 for (i
= 0; i
< spu
.vertex_info
.num_attribs
; i
++) {
797 switch (spu
.vertex_info
.interp_mode
[i
]) {
801 /*tri_linear_coeff(i, 2, 3);*/
802 /* XXX interp W if PERSPECTIVE... */
803 tri_linear_coeff4(i
);
805 case INTERP_CONSTANT
:
809 tri_linear_coeff4(i
);
811 case INTERP_PERSPECTIVE
:
812 tri_linear_coeff4(i
); /* temporary */
819 ASSERT(spu
.vertex_info
.interp_mode
[0] == INTERP_POS
);
820 ASSERT(spu
.vertex_info
.interp_mode
[1] == INTERP_LINEAR
||
821 spu
.vertex_info
.interp_mode
[1] == INTERP_CONSTANT
);
822 tri_linear_coeff(0, 2, 3); /* slot 0, z */
823 tri_linear_coeff(1, 0, 4); /* slot 1, color */
828 static void setup_tri_edges(void)
830 float vmin_x
= spu_extract(setup
.vmin
->data
[0], 0) + 0.5f
;
831 float vmid_x
= spu_extract(setup
.vmid
->data
[0], 0) + 0.5f
;
833 float vmin_y
= spu_extract(setup
.vmin
->data
[0], 1) - 0.5f
;
834 float vmid_y
= spu_extract(setup
.vmid
->data
[0], 1) - 0.5f
;
835 float vmax_y
= spu_extract(setup
.vmax
->data
[0], 1) - 0.5f
;
837 setup
.emaj
.sy
= CEILF(vmin_y
);
838 setup
.emaj
.lines
= (int) CEILF(vmax_y
- setup
.emaj
.sy
);
839 setup
.emaj
.dxdy
= setup
.emaj
.dx
/ setup
.emaj
.dy
;
840 setup
.emaj
.sx
= vmin_x
+ (setup
.emaj
.sy
- vmin_y
) * setup
.emaj
.dxdy
;
842 setup
.etop
.sy
= CEILF(vmid_y
);
843 setup
.etop
.lines
= (int) CEILF(vmax_y
- setup
.etop
.sy
);
844 setup
.etop
.dxdy
= setup
.etop
.dx
/ setup
.etop
.dy
;
845 setup
.etop
.sx
= vmid_x
+ (setup
.etop
.sy
- vmid_y
) * setup
.etop
.dxdy
;
847 setup
.ebot
.sy
= CEILF(vmin_y
);
848 setup
.ebot
.lines
= (int) CEILF(vmid_y
- setup
.ebot
.sy
);
849 setup
.ebot
.dxdy
= setup
.ebot
.dx
/ setup
.ebot
.dy
;
850 setup
.ebot
.sx
= vmin_x
+ (setup
.ebot
.sy
- vmin_y
) * setup
.ebot
.dxdy
;
855 * Render the upper or lower half of a triangle.
856 * Scissoring/cliprect is applied here too.
858 static void subtriangle( struct edge
*eleft
,
862 const int minx
= setup
.cliprect_minx
;
863 const int maxx
= setup
.cliprect_maxx
;
864 const int miny
= setup
.cliprect_miny
;
865 const int maxy
= setup
.cliprect_maxy
;
866 int y
, start_y
, finish_y
;
867 int sy
= (int)eleft
->sy
;
869 ASSERT((int)eleft
->sy
== (int) eright
->sy
);
871 /* clip top/bottom */
873 finish_y
= sy
+ lines
;
885 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
888 for (y
= start_y
; y
< finish_y
; y
++) {
890 /* avoid accumulating adds as floats don't have the precision to
891 * accurately iterate large triangle edges that way. luckily we
892 * can just multiply these days.
894 * this is all drowned out by the attribute interpolation anyway.
896 int left
= (int)(eleft
->sx
+ y
* eleft
->dxdy
);
897 int right
= (int)(eright
->sx
+ y
* eright
->dxdy
);
899 /* clip left/right */
907 if (block(_y
) != setup
.span
.y
) {
909 setup
.span
.y
= block(_y
);
912 setup
.span
.left
[_y
&1] = left
;
913 setup
.span
.right
[_y
&1] = right
;
914 setup
.span
.y_flags
|= 1<<(_y
&1);
919 /* save the values so that emaj can be restarted:
921 eleft
->sx
+= lines
* eleft
->dxdy
;
922 eright
->sx
+= lines
* eright
->dxdy
;
929 * Draw triangle into tile at (tx, ty) (tile coords)
930 * The tile data should have already been fetched.
933 tri_draw(const float *v0
, const float *v1
, const float *v2
, uint tx
, uint ty
)
938 /* set clipping bounds to tile bounds */
939 setup
.cliprect_minx
= tx
* TILE_SIZE
;
940 setup
.cliprect_miny
= ty
* TILE_SIZE
;
941 setup
.cliprect_maxx
= (tx
+ 1) * TILE_SIZE
;
942 setup
.cliprect_maxy
= (ty
+ 1) * TILE_SIZE
;
944 if (!setup_sort_vertices((struct vertex_header
*) v0
,
945 (struct vertex_header
*) v1
,
946 (struct vertex_header
*) v2
)) {
947 return FALSE
; /* totally clipped */
950 setup_tri_coefficients();
954 setup
.span
.y_flags
= 0;
955 setup
.span
.right
[0] = 0;
956 setup
.span
.right
[1] = 0;
957 /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
959 /* init_constant_attribs( setup ); */
961 if (setup
.oneoverarea
< 0.0) {
964 subtriangle( &setup
.emaj
, &setup
.ebot
, setup
.ebot
.lines
);
965 subtriangle( &setup
.emaj
, &setup
.etop
, setup
.etop
.lines
);
970 subtriangle( &setup
.ebot
, &setup
.emaj
, setup
.ebot
.lines
);
971 subtriangle( &setup
.etop
, &setup
.emaj
, setup
.etop
.lines
);