cell: minor texture improvements
[mesa.git] / src / gallium / drivers / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include <transpose_matrix4x4.h>
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_format.h"
35 #include "pipe/p_util.h"
36 #include "spu_colorpack.h"
37 #include "spu_main.h"
38 #include "spu_texture.h"
39 #include "spu_tile.h"
40 #include "spu_tri.h"
41 #include "spu_per_fragment_op.h"
42
43
44 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
45 typedef vector unsigned int mask_t;
46
47 typedef union
48 {
49 vector float v;
50 float f[4];
51 } float4;
52
53
54 /**
55 * Simplified types taken from other parts of Gallium
56 */
57 struct vertex_header {
58 vector float data[1];
59 };
60
61
62
63 /* XXX fix this */
64 #undef CEILF
65 #define CEILF(X) ((float) (int) ((X) + 0.99999))
66
67
68 #define QUAD_TOP_LEFT 0
69 #define QUAD_TOP_RIGHT 1
70 #define QUAD_BOTTOM_LEFT 2
71 #define QUAD_BOTTOM_RIGHT 3
72 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
73 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
74 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
75 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
76 #define MASK_ALL 0xf
77
78
79 #define DEBUG_VERTS 0
80
81 /**
82 * Triangle edge info
83 */
84 struct edge {
85 float dx; /**< X(v1) - X(v0), used only during setup */
86 float dy; /**< Y(v1) - Y(v0), used only during setup */
87 float dxdy; /**< dx/dy */
88 float sx, sy; /**< first sample point coord */
89 int lines; /**< number of lines on this edge */
90 };
91
92
93 struct interp_coef
94 {
95 float4 a0;
96 float4 dadx;
97 float4 dady;
98 };
99
100
101 /**
102 * Triangle setup info (derived from draw_stage).
103 * Also used for line drawing (taking some liberties).
104 */
105 struct setup_stage {
106
107 /* Vertices are just an array of floats making up each attribute in
108 * turn. Currently fixed at 4 floats, but should change in time.
109 * Codegen will help cope with this.
110 */
111 const struct vertex_header *vmax;
112 const struct vertex_header *vmid;
113 const struct vertex_header *vmin;
114 const struct vertex_header *vprovoke;
115
116 struct edge ebot;
117 struct edge etop;
118 struct edge emaj;
119
120 float oneoverarea;
121
122 uint tx, ty;
123
124 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
125
126 #if 0
127 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
128 #else
129 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
130 #endif
131
132 #if 0
133 struct quad_header quad;
134 #endif
135
136 struct {
137 int left[2]; /**< [0] = row0, [1] = row1 */
138 int right[2];
139 int y;
140 unsigned y_flags;
141 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
142 } span;
143 };
144
145
146
147 static struct setup_stage setup;
148
149
150
151
152 #if 0
153 /**
154 * Basically a cast wrapper.
155 */
156 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
157 {
158 return (struct setup_stage *)stage;
159 }
160 #endif
161
162 #if 0
163 /**
164 * Clip setup.quad against the scissor/surface bounds.
165 */
166 static INLINE void
167 quad_clip(struct setup_stage *setup)
168 {
169 const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
170 const int minx = (int) cliprect->minx;
171 const int maxx = (int) cliprect->maxx;
172 const int miny = (int) cliprect->miny;
173 const int maxy = (int) cliprect->maxy;
174
175 if (setup.quad.x0 >= maxx ||
176 setup.quad.y0 >= maxy ||
177 setup.quad.x0 + 1 < minx ||
178 setup.quad.y0 + 1 < miny) {
179 /* totally clipped */
180 setup.quad.mask = 0x0;
181 return;
182 }
183 if (setup.quad.x0 < minx)
184 setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
185 if (setup.quad.y0 < miny)
186 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
187 if (setup.quad.x0 == maxx - 1)
188 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
189 if (setup.quad.y0 == maxy - 1)
190 setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
191 }
192 #endif
193
194 #if 0
195 /**
196 * Emit a quad (pass to next stage) with clipping.
197 */
198 static INLINE void
199 clip_emit_quad(struct setup_stage *setup)
200 {
201 quad_clip(setup);
202 if (setup.quad.mask) {
203 struct softpipe_context *sp = setup.softpipe;
204 sp->quad.first->run(sp->quad.first, &setup.quad);
205 }
206 }
207 #endif
208
209 /**
210 * Evaluate attribute coefficients (plane equations) to compute
211 * attribute values for the four fragments in a quad.
212 * Eg: four colors will be compute.
213 */
214 static INLINE void
215 eval_coeff(uint slot, float x, float y, vector float result[4])
216 {
217 switch (spu.vertex_info.interp_mode[slot]) {
218 case INTERP_CONSTANT:
219 result[QUAD_TOP_LEFT] =
220 result[QUAD_TOP_RIGHT] =
221 result[QUAD_BOTTOM_LEFT] =
222 result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
223 break;
224
225 case INTERP_LINEAR:
226 /* fall-through, for now */
227 default:
228 {
229 register vector float dadx = setup.coef[slot].dadx.v;
230 register vector float dady = setup.coef[slot].dady.v;
231 register vector float topLeft
232 = spu_add(setup.coef[slot].a0.v,
233 spu_add(spu_mul(spu_splats(x), dadx),
234 spu_mul(spu_splats(y), dady)));
235
236 result[QUAD_TOP_LEFT] = topLeft;
237 result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
238 result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
239 result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
240 }
241 }
242 }
243
244
245 static INLINE vector float
246 eval_z(float x, float y)
247 {
248 const uint slot = 0;
249 const float dzdx = setup.coef[slot].dadx.f[2];
250 const float dzdy = setup.coef[slot].dady.f[2];
251 const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
252 const vector float topLeftv = spu_splats(topLeft);
253 const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
254 return spu_add(topLeftv, derivs);
255 }
256
257
258 static INLINE mask_t
259 do_depth_test(int x, int y, mask_t quadmask)
260 {
261 float4 zvals;
262 mask_t mask;
263
264 if (spu.fb.depth_format == PIPE_FORMAT_NONE)
265 return quadmask;
266
267 zvals.v = eval_z((float) x, (float) y);
268
269 mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
270 y - setup.cliprect_miny,
271 (qword) quadmask,
272 (qword) zvals.v,
273 (qword) spu_splats((unsigned char) 0x0ffu),
274 (qword) spu_splats((unsigned int) 0x01u));
275
276 if (spu_extract(spu_orx(mask), 0))
277 spu.cur_ztile_status = TILE_STATUS_DIRTY;
278
279 return mask;
280 }
281
282
283 /**
284 * Emit a quad (pass to next stage). No clipping is done.
285 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
286 * should be skipped. But adding the test for that slows things down
287 * overall.
288 */
289 static INLINE void
290 emit_quad( int x, int y, mask_t mask )
291 {
292 #if 0
293 struct softpipe_context *sp = setup.softpipe;
294 setup.quad.x0 = x;
295 setup.quad.y0 = y;
296 setup.quad.mask = mask;
297 sp->quad.first->run(sp->quad.first, &setup.quad);
298 #else
299
300 if (spu.read_depth) {
301 mask = do_depth_test(x, y, mask);
302 }
303
304 /* If any bits in mask are set... */
305 if (spu_extract(spu_orx(mask), 0)) {
306 const int ix = x - setup.cliprect_minx;
307 const int iy = y - setup.cliprect_miny;
308 vector float colors[4];
309
310 spu.cur_ctile_status = TILE_STATUS_DIRTY;
311
312 if (spu.texture[0].start) {
313 /* texture mapping */
314 const uint unit = 0;
315 vector float texcoords[4];
316 eval_coeff(2, (float) x, (float) y, texcoords);
317
318 if (spu_extract(mask, 0))
319 colors[0] = spu.sample_texture[unit](unit, texcoords[0]);
320 if (spu_extract(mask, 1))
321 colors[1] = spu.sample_texture[unit](unit, texcoords[1]);
322 if (spu_extract(mask, 2))
323 colors[2] = spu.sample_texture[unit](unit, texcoords[2]);
324 if (spu_extract(mask, 3))
325 colors[3] = spu.sample_texture[unit](unit, texcoords[3]);
326
327
328 if (spu.texture[1].start) {
329 /* multi-texture mapping */
330 const uint unit = 1;
331 vector float colors1[4];
332
333 eval_coeff(2, (float) x, (float) y, texcoords);
334
335 if (spu_extract(mask, 0))
336 colors1[0] = spu.sample_texture[unit](unit, texcoords[0]);
337 if (spu_extract(mask, 1))
338 colors1[1] = spu.sample_texture[unit](unit, texcoords[1]);
339 if (spu_extract(mask, 2))
340 colors1[2] = spu.sample_texture[unit](unit, texcoords[2]);
341 if (spu_extract(mask, 3))
342 colors1[3] = spu.sample_texture[unit](unit, texcoords[3]);
343
344 /* hack: modulate first texture by second */
345 colors[0] = spu_mul(colors[0], colors1[0]);
346 colors[1] = spu_mul(colors[1], colors1[1]);
347 colors[2] = spu_mul(colors[2], colors1[2]);
348 colors[3] = spu_mul(colors[3], colors1[3]);
349 }
350
351 }
352 else {
353 /* simple shading */
354 eval_coeff(1, (float) x, (float) y, colors);
355 }
356
357
358 /* Convert fragment data from AoS to SoA format.
359 */
360 qword soa_frag[4];
361 _transpose_matrix4x4((vec_float4 *) soa_frag, colors);
362
363 /* Read the current framebuffer values.
364 */
365 const qword pix[4] = {
366 (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
367 (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
368 (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
369 (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
370 };
371
372 qword soa_pix[4];
373
374 if (spu.read_fb) {
375 /* Convert pixel data from AoS to SoA format.
376 */
377 vec_float4 aos_pix[4] = {
378 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
379 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
380 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
381 spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
382 };
383
384 _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
385 }
386
387
388 struct spu_blend_results result =
389 (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
390 soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
391 spu.const_blend_color[0], spu.const_blend_color[1],
392 spu.const_blend_color[2], spu.const_blend_color[3]);
393
394
395 /* Convert final pixel data from SoA to AoS format.
396 */
397 result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
398 result.r, result.g, result.b, result.a,
399 (qword) mask);
400
401 spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
402 spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
403 spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
404 spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
405 }
406 #endif
407 }
408
409
410 /**
411 * Given an X or Y coordinate, return the block/quad coordinate that it
412 * belongs to.
413 */
414 static INLINE int block( int x )
415 {
416 return x & ~1;
417 }
418
419
420 /**
421 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
422 * the triangle's bounds.
423 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
424 */
425 static INLINE mask_t calculate_mask( int x )
426 {
427 /* This is a little tricky.
428 * Use & instead of && to avoid branches.
429 * Use negation to convert true/false to ~0/0 values.
430 */
431 mask_t mask;
432 mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
433 mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
434 mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
435 mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
436 return mask;
437 }
438
439
440 /**
441 * Render a horizontal span of quads
442 */
443 static void flush_spans( void )
444 {
445 int minleft, maxright;
446 int x;
447
448 switch (setup.span.y_flags) {
449 case 0x3:
450 /* both odd and even lines written (both quad rows) */
451 minleft = MIN2(setup.span.left[0], setup.span.left[1]);
452 maxright = MAX2(setup.span.right[0], setup.span.right[1]);
453 break;
454
455 case 0x1:
456 /* only even line written (quad top row) */
457 minleft = setup.span.left[0];
458 maxright = setup.span.right[0];
459 break;
460
461 case 0x2:
462 /* only odd line written (quad bottom row) */
463 minleft = setup.span.left[1];
464 maxright = setup.span.right[1];
465 break;
466
467 default:
468 return;
469 }
470
471
472 /* OK, we're very likely to need the tile data now.
473 * clear or finish waiting if needed.
474 */
475 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
476 /* wait for mfc_get() to complete */
477 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
478 wait_on_mask(1 << TAG_READ_TILE_COLOR);
479 spu.cur_ctile_status = TILE_STATUS_CLEAN;
480 }
481 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
482 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
483 clear_c_tile(&spu.ctile);
484 spu.cur_ctile_status = TILE_STATUS_DIRTY;
485 }
486 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
487
488 if (spu.read_depth) {
489 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
490 /* wait for mfc_get() to complete */
491 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
492 wait_on_mask(1 << TAG_READ_TILE_Z);
493 spu.cur_ztile_status = TILE_STATUS_CLEAN;
494 }
495 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
496 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
497 clear_z_tile(&spu.ztile);
498 spu.cur_ztile_status = TILE_STATUS_DIRTY;
499 }
500 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
501 }
502
503 /* XXX this loop could be moved into the above switch cases and
504 * calculate_mask() could be simplified a bit...
505 */
506 for (x = block(minleft); x <= block(maxright); x += 2) {
507 #if 1
508 emit_quad( x, setup.span.y, calculate_mask( x ) );
509 #endif
510 }
511
512 setup.span.y = 0;
513 setup.span.y_flags = 0;
514 setup.span.right[0] = 0;
515 setup.span.right[1] = 0;
516 }
517
518 #if DEBUG_VERTS
519 static void print_vertex(const struct vertex_header *v)
520 {
521 int i;
522 fprintf(stderr, "Vertex: (%p)\n", v);
523 for (i = 0; i < setup.quad.nr_attrs; i++) {
524 fprintf(stderr, " %d: %f %f %f %f\n", i,
525 v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
526 }
527 }
528 #endif
529
530
531 static boolean setup_sort_vertices(const struct vertex_header *v0,
532 const struct vertex_header *v1,
533 const struct vertex_header *v2)
534 {
535
536 #if DEBUG_VERTS
537 fprintf(stderr, "Triangle:\n");
538 print_vertex(v0);
539 print_vertex(v1);
540 print_vertex(v2);
541 #endif
542
543 setup.vprovoke = v2;
544
545 /* determine bottom to top order of vertices */
546 {
547 float y0 = spu_extract(v0->data[0], 1);
548 float y1 = spu_extract(v1->data[0], 1);
549 float y2 = spu_extract(v2->data[0], 1);
550 if (y0 <= y1) {
551 if (y1 <= y2) {
552 /* y0<=y1<=y2 */
553 setup.vmin = v0;
554 setup.vmid = v1;
555 setup.vmax = v2;
556 }
557 else if (y2 <= y0) {
558 /* y2<=y0<=y1 */
559 setup.vmin = v2;
560 setup.vmid = v0;
561 setup.vmax = v1;
562 }
563 else {
564 /* y0<=y2<=y1 */
565 setup.vmin = v0;
566 setup.vmid = v2;
567 setup.vmax = v1;
568 }
569 }
570 else {
571 if (y0 <= y2) {
572 /* y1<=y0<=y2 */
573 setup.vmin = v1;
574 setup.vmid = v0;
575 setup.vmax = v2;
576 }
577 else if (y2 <= y1) {
578 /* y2<=y1<=y0 */
579 setup.vmin = v2;
580 setup.vmid = v1;
581 setup.vmax = v0;
582 }
583 else {
584 /* y1<=y2<=y0 */
585 setup.vmin = v1;
586 setup.vmid = v2;
587 setup.vmax = v0;
588 }
589 }
590 }
591
592 /* Check if triangle is completely outside the tile bounds */
593 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
594 return FALSE;
595 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
596 return FALSE;
597 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
598 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
599 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
600 return FALSE;
601 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
602 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
603 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
604 return FALSE;
605
606 setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
607 setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
608 setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
609 setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
610 setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
611 setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
612
613 /*
614 * Compute triangle's area. Use 1/area to compute partial
615 * derivatives of attributes later.
616 *
617 * The area will be the same as prim->det, but the sign may be
618 * different depending on how the vertices get sorted above.
619 *
620 * To determine whether the primitive is front or back facing we
621 * use the prim->det value because its sign is correct.
622 */
623 {
624 const float area = (setup.emaj.dx * setup.ebot.dy -
625 setup.ebot.dx * setup.emaj.dy);
626
627 setup.oneoverarea = 1.0f / area;
628 /*
629 _mesa_printf("%s one-over-area %f area %f det %f\n",
630 __FUNCTION__, setup.oneoverarea, area, prim->det );
631 */
632 }
633
634 #if 0
635 /* We need to know if this is a front or back-facing triangle for:
636 * - the GLSL gl_FrontFacing fragment attribute (bool)
637 * - two-sided stencil test
638 */
639 setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
640 #endif
641
642 return TRUE;
643 }
644
645
646 /**
647 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
648 * The value value comes from vertex->data[slot].
649 * The result will be put into setup.coef[slot].a0.
650 * \param slot which attribute slot
651 */
652 static INLINE void
653 const_coeff(uint slot)
654 {
655 setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
656 setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
657 setup.coef[slot].a0.v = setup.vprovoke->data[slot];
658 }
659
660
661 /**
662 * Compute a0, dadx and dady for a linearly interpolated coefficient,
663 * for a triangle.
664 */
665 static INLINE void
666 tri_linear_coeff(uint slot, uint firstComp, uint lastComp)
667 {
668 uint i;
669 const float *vmin_d = (float *) &setup.vmin->data[slot];
670 const float *vmid_d = (float *) &setup.vmid->data[slot];
671 const float *vmax_d = (float *) &setup.vmax->data[slot];
672 const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f;
673 const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
674
675 for (i = firstComp; i < lastComp; i++) {
676 float botda = vmid_d[i] - vmin_d[i];
677 float majda = vmax_d[i] - vmin_d[i];
678 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
679 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
680
681 ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
682
683 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
684 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
685
686 /* calculate a0 as the value which would be sampled for the
687 * fragment at (0,0), taking into account that we want to sample at
688 * pixel centers, in other words (0.5, 0.5).
689 *
690 * this is neat but unfortunately not a good way to do things for
691 * triangles with very large values of dadx or dady as it will
692 * result in the subtraction and re-addition from a0 of a very
693 * large number, which means we'll end up loosing a lot of the
694 * fractional bits and precision from a0. the way to fix this is
695 * to define a0 as the sample at a pixel center somewhere near vmin
696 * instead - i'll switch to this later.
697 */
698 setup.coef[slot].a0.f[i] = (vmin_d[i] -
699 (setup.coef[slot].dadx.f[i] * x +
700 setup.coef[slot].dady.f[i] * y));
701 }
702
703 /*
704 _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
705 slot, "xyzw"[i],
706 setup.coef[slot].a0[i],
707 setup.coef[slot].dadx.f[i],
708 setup.coef[slot].dady.f[i]);
709 */
710 }
711
712
713 /**
714 * As above, but interp setup all four vector components.
715 */
716 static INLINE void
717 tri_linear_coeff4(uint slot)
718 {
719 const vector float vmin_d = setup.vmin->data[slot];
720 const vector float vmid_d = setup.vmid->data[slot];
721 const vector float vmax_d = setup.vmax->data[slot];
722 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
723 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
724
725 vector float botda = vmid_d - vmin_d;
726 vector float majda = vmax_d - vmin_d;
727
728 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
729 spu_mul(botda, spu_splats(setup.emaj.dy)));
730 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
731 spu_mul(majda, spu_splats(setup.ebot.dx)));
732
733 setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea));
734 setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea));
735
736 vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
737 vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
738
739 setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
740 }
741
742
743
744 #if 0
745 /**
746 * Compute a0, dadx and dady for a perspective-corrected interpolant,
747 * for a triangle.
748 * We basically multiply the vertex value by 1/w before computing
749 * the plane coefficients (a0, dadx, dady).
750 * Later, when we compute the value at a particular fragment position we'll
751 * divide the interpolated value by the interpolated W at that fragment.
752 */
753 static void tri_persp_coeff( unsigned slot,
754 unsigned i )
755 {
756 /* premultiply by 1/w:
757 */
758 float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
759 float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
760 float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
761
762 float botda = mida - mina;
763 float majda = maxa - mina;
764 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
765 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
766
767 /*
768 printf("tri persp %d,%d: %f %f %f\n", slot, i,
769 setup.vmin->data[slot][i],
770 setup.vmid->data[slot][i],
771 setup.vmax->data[slot][i]
772 );
773 */
774
775 assert(slot < PIPE_MAX_SHADER_INPUTS);
776 assert(i <= 3);
777
778 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
779 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
780 setup.coef[slot].a0.f[i] = (mina -
781 (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
782 setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
783 }
784 #endif
785
786
787 /**
788 * Compute the setup.coef[] array dadx, dady, a0 values.
789 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
790 */
791 static void setup_tri_coefficients(void)
792 {
793 #if 1
794 uint i;
795
796 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
797 switch (spu.vertex_info.interp_mode[i]) {
798 case INTERP_NONE:
799 break;
800 case INTERP_POS:
801 /*tri_linear_coeff(i, 2, 3);*/
802 /* XXX interp W if PERSPECTIVE... */
803 tri_linear_coeff4(i);
804 break;
805 case INTERP_CONSTANT:
806 const_coeff(i);
807 break;
808 case INTERP_LINEAR:
809 tri_linear_coeff4(i);
810 break;
811 case INTERP_PERSPECTIVE:
812 tri_linear_coeff4(i); /* temporary */
813 break;
814 default:
815 ASSERT(0);
816 }
817 }
818 #else
819 ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
820 ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
821 spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
822 tri_linear_coeff(0, 2, 3); /* slot 0, z */
823 tri_linear_coeff(1, 0, 4); /* slot 1, color */
824 #endif
825 }
826
827
828 static void setup_tri_edges(void)
829 {
830 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
831 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
832
833 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
834 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
835 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
836
837 setup.emaj.sy = CEILF(vmin_y);
838 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
839 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
840 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
841
842 setup.etop.sy = CEILF(vmid_y);
843 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
844 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
845 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
846
847 setup.ebot.sy = CEILF(vmin_y);
848 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
849 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
850 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
851 }
852
853
854 /**
855 * Render the upper or lower half of a triangle.
856 * Scissoring/cliprect is applied here too.
857 */
858 static void subtriangle( struct edge *eleft,
859 struct edge *eright,
860 unsigned lines )
861 {
862 const int minx = setup.cliprect_minx;
863 const int maxx = setup.cliprect_maxx;
864 const int miny = setup.cliprect_miny;
865 const int maxy = setup.cliprect_maxy;
866 int y, start_y, finish_y;
867 int sy = (int)eleft->sy;
868
869 ASSERT((int)eleft->sy == (int) eright->sy);
870
871 /* clip top/bottom */
872 start_y = sy;
873 finish_y = sy + lines;
874
875 if (start_y < miny)
876 start_y = miny;
877
878 if (finish_y > maxy)
879 finish_y = maxy;
880
881 start_y -= sy;
882 finish_y -= sy;
883
884 /*
885 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
886 */
887
888 for (y = start_y; y < finish_y; y++) {
889
890 /* avoid accumulating adds as floats don't have the precision to
891 * accurately iterate large triangle edges that way. luckily we
892 * can just multiply these days.
893 *
894 * this is all drowned out by the attribute interpolation anyway.
895 */
896 int left = (int)(eleft->sx + y * eleft->dxdy);
897 int right = (int)(eright->sx + y * eright->dxdy);
898
899 /* clip left/right */
900 if (left < minx)
901 left = minx;
902 if (right > maxx)
903 right = maxx;
904
905 if (left < right) {
906 int _y = sy + y;
907 if (block(_y) != setup.span.y) {
908 flush_spans();
909 setup.span.y = block(_y);
910 }
911
912 setup.span.left[_y&1] = left;
913 setup.span.right[_y&1] = right;
914 setup.span.y_flags |= 1<<(_y&1);
915 }
916 }
917
918
919 /* save the values so that emaj can be restarted:
920 */
921 eleft->sx += lines * eleft->dxdy;
922 eright->sx += lines * eright->dxdy;
923 eleft->sy += lines;
924 eright->sy += lines;
925 }
926
927
928 /**
929 * Draw triangle into tile at (tx, ty) (tile coords)
930 * The tile data should have already been fetched.
931 */
932 boolean
933 tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
934 {
935 setup.tx = tx;
936 setup.ty = ty;
937
938 /* set clipping bounds to tile bounds */
939 setup.cliprect_minx = tx * TILE_SIZE;
940 setup.cliprect_miny = ty * TILE_SIZE;
941 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
942 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
943
944 if (!setup_sort_vertices((struct vertex_header *) v0,
945 (struct vertex_header *) v1,
946 (struct vertex_header *) v2)) {
947 return FALSE; /* totally clipped */
948 }
949
950 setup_tri_coefficients();
951 setup_tri_edges();
952
953 setup.span.y = 0;
954 setup.span.y_flags = 0;
955 setup.span.right[0] = 0;
956 setup.span.right[1] = 0;
957 /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
958
959 /* init_constant_attribs( setup ); */
960
961 if (setup.oneoverarea < 0.0) {
962 /* emaj on left:
963 */
964 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
965 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
966 }
967 else {
968 /* emaj on right:
969 */
970 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
971 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
972 }
973
974 flush_spans();
975
976 return TRUE;
977 }