remove vertex_stride_shift, fix up code to not need it
[mesa.git] / src / mesa / drivers / dri / mach64 / mach64_tris.c
1 /* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */
2 /*
3 * Copyright 2000 Gareth Hughes
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Authors:
27 * Gareth Hughes <gareth@valinux.com>
28 * Leif Delgass <ldelgass@retinalburn.net>
29 * José Fonseca <j_r_fonseca@yahoo.co.uk>
30 */
31
32 #include "glheader.h"
33 #include "mtypes.h"
34 #include "colormac.h"
35 #include "macros.h"
36
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "tnl/tnl.h"
40 #include "tnl/t_context.h"
41 #include "tnl/t_pipeline.h"
42
43 #include "mach64_tris.h"
44 #include "mach64_state.h"
45 #include "mach64_context.h"
46 #include "mach64_vb.h"
47 #include "mach64_ioctl.h"
48
49 static const GLuint hw_prim[GL_POLYGON+1] = {
50 MACH64_PRIM_POINTS,
51 MACH64_PRIM_LINES,
52 MACH64_PRIM_LINE_LOOP,
53 MACH64_PRIM_LINE_STRIP,
54 MACH64_PRIM_TRIANGLES,
55 MACH64_PRIM_TRIANGLE_STRIP,
56 MACH64_PRIM_TRIANGLE_FAN,
57 MACH64_PRIM_QUADS,
58 MACH64_PRIM_QUAD_STRIP,
59 MACH64_PRIM_POLYGON,
60 };
61
62 static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim );
63 static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim );
64
65
66 /* FIXME: Remove this when native template is finished. */
67 #define MACH64_PRINT_BUFFER 0
68
69 /***********************************************************************
70 * Emit primitives as inline vertices *
71 ***********************************************************************/
72
73 #if defined(USE_X86_ASM)
74 #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \
75 do { \
76 register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize; \
77 register int __s __asm__( "ecx" ) = vertsize; \
78 if ( vertsize > 7 ) { \
79 *vb++ = (2 << 16) | ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ); \
80 __asm__ __volatile__( "movsl ; movsl ; movsl" \
81 : "=D" (vb), "=S" (__p) \
82 : "0" (vb), "1" (__p) ); \
83 __s -= 3; \
84 } \
85 *vb++ = ((__s - 1 + m) << 16) | \
86 (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1) ); \
87 __asm__ __volatile__( "rep ; movsl" \
88 : "=%c" (__s), "=D" (vb), "=S" (__p) \
89 : "0" (__s), "1" (vb), "2" (__p) ); \
90 } while (0)
91 #else
92 #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \
93 do { \
94 CARD32 *__p = (CARD32 *)v + 10 - vertsize; \
95 int __s = vertsize; \
96 if ( vertsize > 7 ) { \
97 LE32_OUT( vb++, (2 << 16) | \
98 ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) ); \
99 *vb++ = *__p++; \
100 *vb++ = *__p++; \
101 *vb++ = *__p++; \
102 __s -= 3; \
103 } \
104 LE32_OUT( vb++, ((__s - 1 + m) << 16) | \
105 (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) ); \
106 while ( __s-- ) { \
107 *vb++ = *__p++; \
108 } \
109 } while (0)
110 #endif
111
112 #define COPY_VERTEX( vb, vertsize, v, n ) DO_COPY_VERTEX( vb, vertsize, v, n, 0 )
113 #define COPY_VERTEX_OOA( vb, vertsize, v, n ) DO_COPY_VERTEX( vb, vertsize, v, n, 1 )
114
115
116 static __inline void mach64_draw_quad( mach64ContextPtr mmesa,
117 mach64VertexPtr v0,
118 mach64VertexPtr v1,
119 mach64VertexPtr v2,
120 mach64VertexPtr v3 )
121 {
122 #if MACH64_NATIVE_VTXFMT
123 GLcontext *ctx = mmesa->glCtx;
124 const GLuint vertsize = mmesa->vertex_size;
125 GLint a;
126 GLfloat ooa;
127 GLuint xy;
128 const GLuint xyoffset = 9;
129 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
130 unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
131 CARD32 *vb, *vbchk;
132
133 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
134 fprintf(stderr, "%s:\n", __FUNCTION__);
135 fprintf(stderr,"Vertex 1:\n");
136 mach64_print_vertex( ctx, v0 );
137 fprintf(stderr,"Vertex 2:\n");
138 mach64_print_vertex( ctx, v1 );
139 fprintf(stderr,"Vertex 3:\n");
140 mach64_print_vertex( ctx, v2 );
141 fprintf(stderr,"Vertex 4:\n");
142 mach64_print_vertex( ctx, v3 );
143 }
144
145 xy = LE32_IN( &v0->ui[xyoffset] );
146 xx[0] = (GLshort)( xy >> 16 );
147 yy[0] = (GLshort)( xy & 0xffff );
148
149 xy = LE32_IN( &v1->ui[xyoffset] );
150 xx[1] = (GLshort)( xy >> 16 );
151 yy[1] = (GLshort)( xy & 0xffff );
152
153 xy = LE32_IN( &v3->ui[xyoffset] );
154 xx[2] = (GLshort)( xy >> 16 );
155 yy[2] = (GLshort)( xy & 0xffff );
156
157 a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
158 (yy[0] - yy[2]) * (xx[1] - xx[2]);
159
160 if ( (mmesa->backface_sign &&
161 ((a < 0 && !signbit( mmesa->backface_sign )) ||
162 (a > 0 && signbit( mmesa->backface_sign )))) ) {
163 /* cull quad */
164 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
165 fprintf(stderr,"Quad culled\n");
166 return;
167 }
168
169 ooa = 16.0 / a;
170
171 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
172 vbchk = vb + vbsiz;
173
174 COPY_VERTEX( vb, vertsize, v0, 1 );
175 COPY_VERTEX( vb, vertsize, v1, 2 );
176 COPY_VERTEX_OOA( vb, vertsize, v3, 3 );
177 LE32_OUT( vb++, *(CARD32 *)&ooa );
178
179 xy = LE32_IN( &v2->ui[xyoffset] );
180 xx[0] = (GLshort)( xy >> 16 );
181 yy[0] = (GLshort)( xy & 0xffff );
182
183 a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
184 (yy[0] - yy[2]) * (xx[1] - xx[2]);
185
186 ooa = 16.0 / a;
187
188 COPY_VERTEX_OOA( vb, vertsize, v2, 1 );
189 LE32_OUT( vb++, *(CARD32 *)&ooa );
190
191 assert( vb == vbchk );
192
193 #if MACH64_PRINT_BUFFER
194 {
195 int i;
196 fprintf(stderr, "quad:\n");
197 for (i = 0; i < vbsiz; i++)
198 fprintf(stderr, " %08lx\n", *(vb - vbsiz + i));
199 fprintf(stderr, "\n");
200 }
201 #endif
202 #else
203 GLuint vertsize = mmesa->vertex_size;
204 GLint coloridx;
205 GLfloat ooa;
206 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
207 unsigned vbsiz =
208 ((
209 1 +
210 (vertsize > 6 ? 2 : 0) +
211 (vertsize > 4 ? 2 : 0) +
212 3 +
213 (mmesa->multitex ? 4 : 0)
214 ) * 4 + 4);
215 CARD32 *vb;
216 unsigned vbidx = 0;
217
218 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
219 fprintf(stderr, "%s:\n", __FUNCTION__);
220 fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
221 v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
222 fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
223 v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
224 fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
225 v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
226 fprintf(stderr,"Vertex 4: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
227 v3->v.x, v3->v.y, v3->v.z, v3->v.w, v3->v.u0, v3->v.v0, v3->v.u1, v3->v.v1);
228 }
229
230 #if MACH64_CLIENT_STATE_EMITS
231 /* Enable for interleaved client-side state emits */
232 LOCK_HARDWARE( mmesa );
233 if ( mmesa->dirty ) {
234 mach64EmitHwStateLocked( mmesa );
235 }
236 if ( mmesa->sarea->dirty ) {
237 mach64UploadHwStateLocked( mmesa );
238 }
239 UNLOCK_HARDWARE( mmesa );
240 #endif
241
242 xx[0] = (GLint)(v0->v.x * 4);
243 yy[0] = (GLint)(v0->v.y * 4);
244
245 xx[1] = (GLint)(v1->v.x * 4);
246 yy[1] = (GLint)(v1->v.y * 4);
247
248 xx[2] = (GLint)(v3->v.x * 4);
249 yy[2] = (GLint)(v3->v.y * 4);
250
251 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
252 (yy[0] - yy[2]) * (xx[1] - xx[2]));
253
254 if ( ooa * mmesa->backface_sign < 0 ) {
255 /* cull quad */
256 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
257 fprintf(stderr,"Quad culled\n");
258 return;
259 }
260
261 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
262
263 ooa = 1.0 / ooa;
264
265 coloridx = (vertsize > 4) ? 4: 3;
266
267 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
268 switch (vertsize) {
269 case 6:
270 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
271 break;
272 case 4:
273 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
274 break;
275 default: /* vertsize >= 8 */
276 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
277 break;
278 }
279 if (vertsize > 6) {
280 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
281 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
282 }
283 if (vertsize > 4) {
284 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
285 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
286 }
287 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
288 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
289 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
290
291 if (mmesa->multitex) {
292 /* setup for 3 sequential reg writes */
293 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
294 LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
295 LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
296 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
297 }
298
299 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
300 switch (vertsize) {
301 case 6:
302 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
303 break;
304 case 4:
305 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
306 break;
307 default: /* vertsize >= 8 */
308 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
309 break;
310 }
311 if (vertsize > 6) {
312 LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
313 LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
314 }
315 if (vertsize > 4) {
316 LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
317 LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
318 }
319 LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_2_Z */
320 vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */
321 LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
322
323 if (mmesa->multitex) {
324 /* setup for 3 sequential reg writes */
325 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
326 LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
327 LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
328 LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
329 }
330
331 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
332 switch (vertsize) {
333 case 6:
334 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
335 break;
336 case 4:
337 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
338 break;
339 default: /* vertsize >= 8 */
340 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
341 break;
342 }
343 if (vertsize > 6) {
344 LE32_OUT( &vb[vbidx++], v3->ui[6] ); /* MACH64_VERTEX_3_S */
345 LE32_OUT( &vb[vbidx++], v3->ui[7] ); /* MACH64_VERTEX_3_T */
346 }
347 if (vertsize > 4) {
348 LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_W */
349 LE32_OUT( &vb[vbidx++], v3->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
350 }
351 LE32_OUT( &vb[vbidx++], ((GLint)(v3->v.z) << 15) ); /* MACH64_VERTEX_3_Z */
352 vb[vbidx++] = v3->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */
353 LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
354
355 if (mmesa->multitex) {
356 /* setup for 3 sequential reg writes */
357 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
358 LE32_OUT( &vb[vbidx++], v3->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
359 LE32_OUT( &vb[vbidx++], v3->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
360 LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
361 }
362
363 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
364 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
365
366 xx[0] = (GLint)(v2->v.x * 4);
367 yy[0] = (GLint)(v2->v.y * 4);
368
369 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
370 (yy[0] - yy[2]) * (xx[1] - xx[2]));
371 ooa = 1.0 / ooa;
372
373 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
374 switch (vertsize) {
375 case 6:
376 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
377 break;
378 case 4:
379 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
380 break;
381 default: /* vertsize >= 8 */
382 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
383 break;
384 }
385 if (vertsize > 6) {
386 LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_1_S */
387 LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_1_T */
388 }
389 if (vertsize > 4) {
390 LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_W */
391 LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
392 }
393 LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
394 vb[vbidx++] = v2->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
395 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
396
397 if (mmesa->multitex) {
398 /* setup for 3 sequential reg writes */
399 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
400 LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
401 LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
402 LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
403 }
404
405 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
406 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
407
408 assert(vbsiz == vbidx);
409
410 #if MACH64_PRINT_BUFFER
411 {
412 int i;
413 fprintf(stderr, "quad:\n");
414 for (i = 0; i < vbsiz; i++)
415 fprintf(stderr, " %08lx\n", *(vb + i));
416 fprintf(stderr, "\n");
417 }
418 #endif
419 #endif
420 }
421
422 static __inline void mach64_draw_triangle( mach64ContextPtr mmesa,
423 mach64VertexPtr v0,
424 mach64VertexPtr v1,
425 mach64VertexPtr v2 )
426 {
427 #if MACH64_NATIVE_VTXFMT
428 GLcontext *ctx = mmesa->glCtx;
429 GLuint vertsize = mmesa->vertex_size;
430 GLint a;
431 GLfloat ooa;
432 GLuint xy;
433 const GLuint xyoffset = 9;
434 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
435 unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1;
436 CARD32 *vb, *vbchk;
437
438 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
439 fprintf(stderr, "%s:\n", __FUNCTION__);
440 fprintf(stderr,"Vertex 1:\n");
441 mach64_print_vertex( ctx, v0 );
442 fprintf(stderr,"Vertex 2:\n");
443 mach64_print_vertex( ctx, v1 );
444 fprintf(stderr,"Vertex 3:\n");
445 mach64_print_vertex( ctx, v2 );
446 }
447
448 xy = LE32_IN( &v0->ui[xyoffset] );
449 xx[0] = (GLshort)( xy >> 16 );
450 yy[0] = (GLshort)( xy & 0xffff );
451
452 xy = LE32_IN( &v1->ui[xyoffset] );
453 xx[1] = (GLshort)( xy >> 16 );
454 yy[1] = (GLshort)( xy & 0xffff );
455
456 xy = LE32_IN( &v2->ui[xyoffset] );
457 xx[2] = (GLshort)( xy >> 16 );
458 yy[2] = (GLshort)( xy & 0xffff );
459
460 a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
461 (yy[0] - yy[2]) * (xx[1] - xx[2]);
462
463 if ( mmesa->backface_sign &&
464 ((a < 0 && !signbit( mmesa->backface_sign )) ||
465 (a > 0 && signbit( mmesa->backface_sign ))) ) {
466 /* cull triangle */
467 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
468 fprintf(stderr,"Triangle culled\n");
469 return;
470 }
471
472 ooa = 16.0 / a;
473
474 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
475 vbchk = vb + vbsiz;
476
477 COPY_VERTEX( vb, vertsize, v0, 1 );
478 COPY_VERTEX( vb, vertsize, v1, 2 );
479 COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
480 LE32_OUT( vb++, *(CARD32 *)&ooa );
481
482 assert( vb == vbchk );
483
484 #if MACH64_PRINT_BUFFER
485 {
486 int i;
487 fprintf(stderr, "tri:\n");
488 for (i = 0; i < vbsiz; i++)
489 fprintf(stderr, " %08lx\n", *(vb - vbsiz + i));
490 fprintf(stderr, "\n");
491 }
492 #endif
493 #else
494 GLuint vertsize = mmesa->vertex_size;
495 GLint coloridx;
496 GLfloat ooa;
497 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
498 unsigned vbsiz =
499 ((
500 1 +
501 (vertsize > 6 ? 2 : 0) +
502 (vertsize > 4 ? 2 : 0) +
503 3 +
504 (mmesa->multitex ? 4 : 0)
505 ) * 3 + 2);
506 CARD32 *vb;
507 unsigned vbidx = 0;
508
509 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
510 fprintf(stderr, "%s:\n", __FUNCTION__);
511 fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
512 v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
513 fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
514 v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
515 fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n",
516 v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
517 }
518
519 #if MACH64_CLIENT_STATE_EMITS
520 /* Enable for interleaved client-side state emits */
521 LOCK_HARDWARE( mmesa );
522 if ( mmesa->dirty ) {
523 mach64EmitHwStateLocked( mmesa );
524 }
525 if ( mmesa->sarea->dirty ) {
526 mach64UploadHwStateLocked( mmesa );
527 }
528 UNLOCK_HARDWARE( mmesa );
529 #endif
530
531 xx[0] = (GLint)(v0->v.x * 4);
532 yy[0] = (GLint)(v0->v.y * 4);
533
534 xx[1] = (GLint)(v1->v.x * 4);
535 yy[1] = (GLint)(v1->v.y * 4);
536
537 xx[2] = (GLint)(v2->v.x * 4);
538 yy[2] = (GLint)(v2->v.y * 4);
539
540 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
541 (yy[0] - yy[2]) * (xx[1] - xx[2]));
542
543 if ( ooa * mmesa->backface_sign < 0 ) {
544 /* cull triangle */
545 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
546 fprintf(stderr,"Triangle culled\n");
547 return;
548 }
549
550 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
551
552 ooa = 1.0 / ooa;
553
554 coloridx = (vertsize > 4) ? 4: 3;
555
556 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
557 switch (vertsize) {
558 case 6:
559 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
560 break;
561 case 4:
562 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
563 break;
564 default: /* vertsize >= 8 */
565 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
566 break;
567 }
568 if (vertsize > 6) {
569 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
570 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
571 }
572 if (vertsize > 4) {
573 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
574 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
575 }
576 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
577 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
578 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
579
580 if (mmesa->multitex) {
581 /* setup for 3 sequential reg writes */
582 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
583 LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
584 LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
585 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
586 }
587
588 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
589 switch (vertsize) {
590 case 6:
591 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
592 break;
593 case 4:
594 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
595 break;
596 default: /* vertsize >= 8 */
597 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
598 break;
599 }
600 if (vertsize > 6) {
601 LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
602 LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
603 }
604 if (vertsize > 4) {
605 LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
606 LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
607 }
608 LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_2_Z */
609 vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */
610 LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
611
612 if (mmesa->multitex) {
613 /* setup for 3 sequential reg writes */
614 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
615 LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
616 LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
617 LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
618 }
619
620 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
621 switch (vertsize) {
622 case 6:
623 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
624 break;
625 case 4:
626 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
627 break;
628 default: /* vertsize >= 8 */
629 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
630 break;
631 }
632 if (vertsize > 6) {
633 LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_3_S */
634 LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_3_T */
635 }
636 if (vertsize > 4) {
637 LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_W */
638 LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
639 }
640 LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) ); /* MACH64_VERTEX_3_Z */
641 vb[vbidx++] = v2->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */
642 LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
643
644 if (mmesa->multitex) {
645 /* setup for 3 sequential reg writes */
646 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
647 LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
648 LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
649 LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
650 }
651
652 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
653 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
654
655 assert(vbsiz == vbidx);
656
657 #if MACH64_PRINT_BUFFER
658 {
659 int i;
660 fprintf(stderr, "tri:\n");
661 for (i = 0; i < vbsiz; ++i)
662 fprintf(stderr, " %08lx\n", *(vb + i));
663 fprintf(stderr, "\n");
664 }
665 #endif
666 #endif
667 }
668
669 static __inline void mach64_draw_line( mach64ContextPtr mmesa,
670 mach64VertexPtr v0,
671 mach64VertexPtr v1 )
672 {
673 #if MACH64_NATIVE_VTXFMT
674 GLcontext *ctx = mmesa->glCtx;
675 const GLuint vertsize = mmesa->vertex_size;
676 GLint width = (GLint)(mmesa->glCtx->Line._Width * 2.0); /* 2 fractional bits for hardware */
677 GLfloat ooa;
678 GLuint *pxy0, *pxy1;
679 GLuint xy0old, xy0, xy1old, xy1;
680 const GLuint xyoffset = 9;
681 GLint x0, y0, x1, y1;
682 GLint dx, dy, ix, iy;
683 unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
684 CARD32 *vb, *vbchk;
685
686 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
687 fprintf(stderr, "%s:\n", __FUNCTION__);
688 fprintf(stderr,"Vertex 1:\n");
689 mach64_print_vertex( ctx, v0 );
690 fprintf(stderr,"Vertex 2:\n");
691 mach64_print_vertex( ctx, v1 );
692 }
693
694 if( !width )
695 width = 1; /* round to the nearest supported width */
696
697 pxy0 = &v0->ui[xyoffset];
698 xy0old = *pxy0;
699 xy0 = LE32_IN( &xy0old );
700 x0 = (GLshort)( xy0 >> 16 );
701 y0 = (GLshort)( xy0 & 0xffff );
702
703 pxy1 = &v1->ui[xyoffset];
704 xy1old = *pxy1;
705 xy1 = LE32_IN( &xy1old );
706 x1 = (GLshort)( xy1 >> 16 );
707 y1 = (GLshort)( xy1 & 0xffff );
708
709 if ( (dx = x1 - x0) < 0 ) {
710 dx = -dx;
711 }
712 if ( (dy = y1 - y0) < 0 ) {
713 dy = -dy;
714 }
715
716 /* adjust vertices depending on line direction */
717 if ( dx > dy ) {
718 ix = 0;
719 iy = width;
720 ooa = 8.0 / ((x1 - x0) * width);
721 } else {
722 ix = width;
723 iy = 0;
724 ooa = 8.0 / ((y0 - y1) * width);
725 }
726
727 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
728 vbchk = vb + vbsiz;
729
730 LE32_OUT( pxy0, (( x0 - ix ) << 16) | (( y0 - iy ) & 0xffff) );
731 COPY_VERTEX( vb, vertsize, v0, 1 );
732 LE32_OUT( pxy1, (( x1 - ix ) << 16) | (( y1 - iy ) & 0xffff) );
733 COPY_VERTEX( vb, vertsize, v1, 2 );
734 LE32_OUT( pxy0, (( x0 + ix ) << 16) | (( y0 + iy ) & 0xffff) );
735 COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
736 LE32_OUT( vb++, *(CARD32 *)&ooa );
737
738 ooa = -ooa;
739
740 LE32_OUT( pxy1, (( x1 + ix ) << 16) | (( y1 + iy ) & 0xffff) );
741 COPY_VERTEX_OOA( vb, vertsize, v1, 1 );
742 LE32_OUT( vb++, *(CARD32 *)&ooa );
743
744 *pxy0 = xy0old;
745 *pxy1 = xy1old;
746 #else /* !MACH64_NATIVE_VTXFMT */
747 GLuint vertsize = mmesa->vertex_size;
748 GLint coloridx;
749 float width = 1.0; /* Only support 1 pix lines now */
750 GLfloat ooa;
751 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
752 unsigned vbsiz =
753 ((
754 1 +
755 (vertsize > 6 ? 2 : 0) +
756 (vertsize > 4 ? 2 : 0) +
757 3 +
758 (mmesa->multitex ? 4 : 0)
759 ) * 4 + 4);
760 CARD32 *vb;
761 unsigned vbidx = 0;
762
763 GLfloat hw, dx, dy, ix, iy;
764 GLfloat x0 = v0->v.x;
765 GLfloat y0 = v0->v.y;
766 GLfloat x1 = v1->v.x;
767 GLfloat y1 = v1->v.y;
768
769 #if MACH64_CLIENT_STATE_EMITS
770 /* Enable for interleaved client-side state emits */
771 LOCK_HARDWARE( mmesa );
772 if ( mmesa->dirty ) {
773 mach64EmitHwStateLocked( mmesa );
774 }
775 if ( mmesa->sarea->dirty ) {
776 mach64UploadHwStateLocked( mmesa );
777 }
778 UNLOCK_HARDWARE( mmesa );
779 #endif
780
781 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
782 fprintf(stderr, "%s:\n", __FUNCTION__);
783 fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n",
784 v0->v.x, v0->v.y, v0->v.z, v0->v.w);
785 fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n",
786 v1->v.x, v1->v.y, v1->v.z, v1->v.w);
787 }
788
789 hw = 0.5F * width;
790 if (hw > 0.1F && hw < 0.5F) {
791 hw = 0.5F;
792 }
793
794 /* adjust vertices depending on line direction */
795 dx = v0->v.x - v1->v.x;
796 dy = v0->v.y - v1->v.y;
797 if (dx * dx > dy * dy) {
798 /* X-major line */
799 ix = 0.0F;
800 iy = hw;
801 if (x1 < x0) {
802 x0 += 0.5F;
803 x1 += 0.5F;
804 }
805 y0 -= 0.5F;
806 y1 -= 0.5F;
807 }
808 else {
809 /* Y-major line */
810 ix = hw;
811 iy = 0.0F;
812 if (y1 > y0) {
813 y0 -= 0.5F;
814 y1 -= 0.5F;
815 }
816 x0 += 0.5F;
817 x1 += 0.5F;
818 }
819
820 xx[0] = (GLint)((x0 - ix) * 4);
821 yy[0] = (GLint)((y0 - iy) * 4);
822
823 xx[1] = (GLint)((x1 - ix) * 4);
824 yy[1] = (GLint)((y1 - iy) * 4);
825
826 xx[2] = (GLint)((x0 + ix) * 4);
827 yy[2] = (GLint)((y0 + iy) * 4);
828
829 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
830 (yy[0] - yy[2]) * (xx[1] - xx[2]));
831
832 if ( ooa * mmesa->backface_sign < 0 ) {
833 /* cull line */
834 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
835 fprintf(stderr,"Line culled\n");
836 return;
837 }
838
839 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
840
841 ooa = 1.0 / ooa;
842
843 coloridx = (vertsize > 4) ? 4: 3;
844
845 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
846 switch (vertsize) {
847 case 6:
848 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
849 break;
850 case 4:
851 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
852 break;
853 default: /* vertsize >= 8 */
854 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
855 break;
856 }
857 if (vertsize > 6) {
858 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
859 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
860 }
861 if (vertsize > 4) {
862 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
863 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
864 }
865 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
866 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
867 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
868
869 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
870 switch (vertsize) {
871 case 6:
872 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
873 break;
874 case 4:
875 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
876 break;
877 default: /* vertsize >= 8 */
878 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
879 break;
880 }
881 if (vertsize > 6) {
882 LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
883 LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
884 }
885 if (vertsize > 4) {
886 LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
887 LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
888 }
889 LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_2_Z */
890 vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */
891 LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
892
893 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
894 switch (vertsize) {
895 case 6:
896 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
897 break;
898 case 4:
899 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
900 break;
901 default: /* vertsize >= 8 */
902 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
903 break;
904 }
905 if (vertsize > 6) {
906 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
907 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
908 }
909 if (vertsize > 4) {
910 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
911 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
912 }
913 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_3_Z */
914 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */
915 LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
916
917 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
918 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
919
920 xx[0] = (GLint)((x1 + ix) * 4);
921 yy[0] = (GLint)((y1 + iy) * 4);
922
923 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
924 (yy[0] - yy[2]) * (xx[1] - xx[2]));
925 ooa = 1.0 / ooa;
926
927 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
928 switch (vertsize) {
929 case 6:
930 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
931 break;
932 case 4:
933 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
934 break;
935 default: /* vertsize >= 8 */
936 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
937 break;
938 }
939 if (vertsize > 6) {
940 LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_1_S */
941 LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_1_T */
942 }
943 if (vertsize > 4) {
944 LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_1_W */
945 LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
946 }
947 LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
948 vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
949 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
950
951 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
952 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
953
954 assert(vbsiz == vbidx);
955 #endif
956 }
957
958 static __inline void mach64_draw_point( mach64ContextPtr mmesa,
959 mach64VertexPtr v0 )
960 {
961 #if MACH64_NATIVE_VTXFMT
962 GLcontext *ctx = mmesa->glCtx;
963 const GLuint vertsize = mmesa->vertex_size;
964 GLint sz = (GLint)(mmesa->glCtx->Point._Size * 2.0); /* 2 fractional bits for hardware */
965 GLfloat ooa;
966 GLuint *pxy;
967 GLuint xyold, xy;
968 const GLuint xyoffset = 9;
969 GLint x, y;
970 unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
971 CARD32 *vb, *vbchk;
972
973 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
974 fprintf(stderr, "%s:\n", __FUNCTION__);
975 fprintf(stderr,"Vertex 1:\n");
976 mach64_print_vertex( ctx, v0 );
977 }
978
979 if( !sz )
980 sz = 1; /* round to the nearest supported size */
981
982 pxy = &v0->ui[xyoffset];
983 xyold = *pxy;
984 xy = LE32_IN( &xyold );
985 x = (GLshort)( xy >> 16 );
986 y = (GLshort)( xy & 0xffff );
987
988 ooa = 4.0 / (sz * sz);
989
990 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
991 vbchk = vb + vbsiz;
992
993 LE32_OUT( pxy, (( x - sz ) << 16) | (( y - sz ) & 0xffff) );
994 COPY_VERTEX( vb, vertsize, v0, 1 );
995 LE32_OUT( pxy, (( x + sz ) << 16) | (( y - sz ) & 0xffff) );
996 COPY_VERTEX( vb, vertsize, v0, 2 );
997 LE32_OUT( pxy, (( x - sz ) << 16) | (( y + sz ) & 0xffff) );
998 COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
999 LE32_OUT( vb++, *(CARD32 *)&ooa );
1000
1001 ooa = -ooa;
1002
1003 LE32_OUT( pxy, (( x + sz ) << 16) | (( y + sz ) & 0xffff) );
1004 COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
1005 LE32_OUT( vb++, *(CARD32 *)&ooa );
1006
1007 *pxy = xyold;
1008 #else /* !MACH64_NATIVE_VTXFMT */
1009 GLuint vertsize = mmesa->vertex_size;
1010 GLint coloridx;
1011 float sz = 1.0; /* Only support 1 pix points now */
1012 GLfloat ooa;
1013 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
1014 unsigned vbsiz =
1015 ((
1016 1 +
1017 (vertsize > 6 ? 2 : 0) +
1018 (vertsize > 4 ? 2 : 0) +
1019 3 +
1020 (mmesa->multitex ? 4 : 0)
1021 ) * 4 + 4);
1022 CARD32 *vb;
1023 unsigned vbidx = 0;
1024
1025 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
1026 fprintf(stderr, "%s:\n", __FUNCTION__);
1027 fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n",
1028 v0->v.x, v0->v.y, v0->v.z, v0->v.w);
1029 }
1030
1031 #if MACH64_CLIENT_STATE_EMITS
1032 /* Enable for interleaved client-side state emits */
1033 LOCK_HARDWARE( mmesa );
1034 if ( mmesa->dirty ) {
1035 mach64EmitHwStateLocked( mmesa );
1036 }
1037 if ( mmesa->sarea->dirty ) {
1038 mach64UploadHwStateLocked( mmesa );
1039 }
1040 UNLOCK_HARDWARE( mmesa );
1041 #endif
1042
1043 xx[0] = (GLint)((v0->v.x - sz) * 4);
1044 yy[0] = (GLint)((v0->v.y - sz) * 4);
1045
1046 xx[1] = (GLint)((v0->v.x + sz) * 4);
1047 yy[1] = (GLint)((v0->v.y - sz) * 4);
1048
1049 xx[2] = (GLint)((v0->v.x - sz) * 4);
1050 yy[2] = (GLint)((v0->v.y + sz) * 4);
1051
1052 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
1053 (yy[0] - yy[2]) * (xx[1] - xx[2]));
1054
1055 if ( ooa * mmesa->backface_sign < 0 ) {
1056 /* cull quad */
1057 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
1058 fprintf(stderr,"Point culled\n");
1059 return;
1060 }
1061
1062 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
1063
1064 ooa = 1.0 / ooa;
1065
1066 coloridx = (vertsize > 4) ? 4: 3;
1067
1068 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
1069 switch (vertsize) {
1070 case 6:
1071 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
1072 break;
1073 case 4:
1074 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
1075 break;
1076 default: /* vertsize >= 8 */
1077 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
1078 break;
1079 }
1080 if (vertsize > 6) {
1081 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
1082 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
1083 }
1084 if (vertsize > 4) {
1085 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
1086 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
1087 }
1088 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
1089 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
1090 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
1091
1092 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
1093 switch (vertsize) {
1094 case 6:
1095 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
1096 break;
1097 case 4:
1098 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
1099 break;
1100 default: /* vertsize >= 8 */
1101 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
1102 break;
1103 }
1104 if (vertsize > 6) {
1105 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_2_S */
1106 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_2_T */
1107 }
1108 if (vertsize > 4) {
1109 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_2_W */
1110 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
1111 }
1112 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_2_Z */
1113 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */
1114 LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
1115
1116 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
1117 switch (vertsize) {
1118 case 6:
1119 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
1120 break;
1121 case 4:
1122 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
1123 break;
1124 default: /* vertsize >= 8 */
1125 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
1126 break;
1127 }
1128 if (vertsize > 6) {
1129 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
1130 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
1131 }
1132 if (vertsize > 4) {
1133 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
1134 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
1135 }
1136 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_3_Z */
1137 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */
1138 LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
1139
1140 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
1141 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
1142
1143 xx[0] = (GLint)((v0->v.x + sz) * 4);
1144 yy[0] = (GLint)((v0->v.y + sz) * 4);
1145
1146 ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
1147 (yy[0] - yy[2]) * (xx[1] - xx[2]));
1148 ooa = 1.0 / ooa;
1149
1150 /* setup for 3,5, or 7 sequential reg writes based on vertex format */
1151 switch (vertsize) {
1152 case 6:
1153 LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
1154 break;
1155 case 4:
1156 LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
1157 break;
1158 default: /* vertsize >= 8 */
1159 LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
1160 break;
1161 }
1162 if (vertsize > 6) {
1163 LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
1164 LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
1165 }
1166 if (vertsize > 4) {
1167 LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
1168 LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
1169 }
1170 LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */
1171 vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */
1172 LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
1173
1174 LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
1175 LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
1176
1177 assert(vbsiz == vbidx);
1178 #endif
1179 }
1180
1181 /***********************************************************************
1182 * Macros for t_dd_tritmp.h to draw basic primitives *
1183 ***********************************************************************/
1184
1185 #define TRI( a, b, c ) \
1186 do { \
1187 if (DO_FALLBACK) \
1188 mmesa->draw_tri( mmesa, a, b, c ); \
1189 else \
1190 mach64_draw_triangle( mmesa, a, b, c ); \
1191 } while (0)
1192
1193 #define QUAD( a, b, c, d ) \
1194 do { \
1195 if (DO_FALLBACK) { \
1196 mmesa->draw_tri( mmesa, a, b, d ); \
1197 mmesa->draw_tri( mmesa, b, c, d ); \
1198 } else \
1199 mach64_draw_quad( mmesa, a, b, c, d ); \
1200 } while (0)
1201
1202 #define LINE( v0, v1 ) \
1203 do { \
1204 if (DO_FALLBACK) \
1205 mmesa->draw_line( mmesa, v0, v1 ); \
1206 else \
1207 mach64_draw_line( mmesa, v0, v1 ); \
1208 } while (0)
1209
1210 #define POINT( v0 ) \
1211 do { \
1212 if (DO_FALLBACK) \
1213 mmesa->draw_point( mmesa, v0 ); \
1214 else \
1215 mach64_draw_point( mmesa, v0 ); \
1216 } while (0)
1217
1218
1219 /***********************************************************************
1220 * Build render functions from dd templates *
1221 ***********************************************************************/
1222
1223 #define MACH64_OFFSET_BIT 0x01
1224 #define MACH64_TWOSIDE_BIT 0x02
1225 #define MACH64_UNFILLED_BIT 0x04
1226 #define MACH64_FALLBACK_BIT 0x08
1227 #define MACH64_MAX_TRIFUNC 0x10
1228
1229 static struct {
1230 points_func points;
1231 line_func line;
1232 triangle_func triangle;
1233 quad_func quad;
1234 } rast_tab[MACH64_MAX_TRIFUNC];
1235
1236
1237 #define DO_FALLBACK (IND & MACH64_FALLBACK_BIT)
1238 #define DO_OFFSET (IND & MACH64_OFFSET_BIT)
1239 #define DO_UNFILLED (IND & MACH64_UNFILLED_BIT)
1240 #define DO_TWOSIDE (IND & MACH64_TWOSIDE_BIT)
1241 #define DO_FLAT 0
1242 #define DO_TRI 1
1243 #define DO_QUAD 1
1244 #define DO_LINE 1
1245 #define DO_POINTS 1
1246 #define DO_FULL_QUAD 1
1247
1248 #define HAVE_RGBA 1
1249 #define HAVE_SPEC 1
1250 #define HAVE_BACK_COLORS 0
1251 #define HAVE_HW_FLATSHADE 1
1252 #define VERTEX mach64Vertex
1253 #define TAB rast_tab
1254
1255 #if MACH64_NATIVE_VTXFMT
1256
1257 /* #define DEPTH_SCALE 65536.0 */
1258 #define DEPTH_SCALE 1
1259 #define UNFILLED_TRI unfilled_tri
1260 #define UNFILLED_QUAD unfilled_quad
1261 #define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0)
1262 #define VERT_Y(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) >> 16) / 4.0)
1263 #define VERT_Z(_v) ((GLfloat) LE32_IN( &(_v)->ui[zoffset] ))
1264 #define INSANE_VERTICES
1265 #define VERT_SET_Z(_v,val) LE32_OUT( &(_v)->ui[zoffset], (GLuint)(val) )
1266 #define VERT_Z_ADD(_v,val) LE32_OUT( &(_v)->ui[zoffset], LE32_IN( &(_v)->ui[zoffset] ) + (GLuint)(val) )
1267 #define AREA_IS_CCW( a ) ((a) < 0)
1268 #define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int)))
1269
1270 #define MACH64_COLOR( dst, src ) \
1271 do { \
1272 UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \
1273 UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]); \
1274 UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]); \
1275 UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]); \
1276 } while (0)
1277
1278 #define MACH64_SPEC( dst, src ) \
1279 do { \
1280 UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \
1281 UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]); \
1282 UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]); \
1283 } while (0)
1284
1285 #define VERT_SET_RGBA( v, c ) MACH64_COLOR( v->ub4[coloroffset], c )
1286 #define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
1287 #define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset]
1288 #define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
1289
1290 #define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[specoffset], c )
1291 #define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V( v0->ub4[specoffset], v1->ub4[specoffset] )
1292 #define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[specoffset]
1293 #define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx]
1294
1295 #define LOCAL_VARS(n) \
1296 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \
1297 GLuint color[n], spec[n]; \
1298 GLuint vertex_size = mmesa->vertex_size; \
1299 const GLuint xyoffset = 9; \
1300 const GLuint coloroffset = 8; \
1301 const GLuint zoffset = 7; \
1302 const GLuint specoffset = 6; \
1303 GLboolean havespec = vertex_size >= 4 ? 1 : 0; \
1304 (void) color; (void) spec; (void) vertex_size; \
1305 (void) xyoffset; (void) coloroffset; (void) zoffset; \
1306 (void) specoffset; (void) havespec;
1307
1308 #else
1309
1310 #define DEPTH_SCALE 1.0
1311 #define UNFILLED_TRI unfilled_tri
1312 #define UNFILLED_QUAD unfilled_quad
1313 #define VERT_X(_v) _v->v.x
1314 #define VERT_Y(_v) _v->v.y
1315 #define VERT_Z(_v) _v->v.z
1316 #define AREA_IS_CCW( a ) (a > 0)
1317 #define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int)))
1318
1319 #define MACH64_COLOR( dst, src ) \
1320 do { \
1321 UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \
1322 UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]); \
1323 UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]); \
1324 UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]); \
1325 } while (0)
1326
1327 #define MACH64_SPEC( dst, src ) \
1328 do { \
1329 UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \
1330 UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]); \
1331 UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]); \
1332 } while (0)
1333
1334 #define VERT_SET_RGBA( v, c ) MACH64_COLOR( v->ub4[coloroffset], c )
1335 #define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
1336 #define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset]
1337 #define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
1338
1339 #define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[5], c )
1340 #define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5])
1341 #define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[5]
1342 #define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
1343
1344 #define LOCAL_VARS(n) \
1345 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \
1346 GLuint color[n], spec[n]; \
1347 GLuint coloroffset = (mmesa->vertex_size == 4 ? 3 : 4); \
1348 GLboolean havespec = (mmesa->vertex_size == 4 ? 0 : 1); \
1349 (void) color; (void) spec; (void) coloroffset; (void) havespec;
1350
1351 #endif
1352
1353 /***********************************************************************
1354 * Helpers for rendering unfilled primitives *
1355 ***********************************************************************/
1356
1357 #define RASTERIZE(x) if (mmesa->hw_primitive != hw_prim[x]) \
1358 mach64RasterPrimitive( ctx, hw_prim[x] )
1359 #define RENDER_PRIMITIVE mmesa->render_primitive
1360 #define IND MACH64_FALLBACK_BIT
1361 #define TAG(x) x
1362 #include "tnl_dd/t_dd_unfilled.h"
1363 #undef IND
1364
1365
1366 /***********************************************************************
1367 * Generate GL render functions *
1368 ***********************************************************************/
1369
1370
1371 #define IND (0)
1372 #define TAG(x) x
1373 #include "tnl_dd/t_dd_tritmp.h"
1374
1375 #define IND (MACH64_OFFSET_BIT)
1376 #define TAG(x) x##_offset
1377 #include "tnl_dd/t_dd_tritmp.h"
1378
1379 #define IND (MACH64_TWOSIDE_BIT)
1380 #define TAG(x) x##_twoside
1381 #include "tnl_dd/t_dd_tritmp.h"
1382
1383 #define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT)
1384 #define TAG(x) x##_twoside_offset
1385 #include "tnl_dd/t_dd_tritmp.h"
1386
1387 #define IND (MACH64_UNFILLED_BIT)
1388 #define TAG(x) x##_unfilled
1389 #include "tnl_dd/t_dd_tritmp.h"
1390
1391 #define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
1392 #define TAG(x) x##_offset_unfilled
1393 #include "tnl_dd/t_dd_tritmp.h"
1394
1395 #define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT)
1396 #define TAG(x) x##_twoside_unfilled
1397 #include "tnl_dd/t_dd_tritmp.h"
1398
1399 #define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
1400 #define TAG(x) x##_twoside_offset_unfilled
1401 #include "tnl_dd/t_dd_tritmp.h"
1402
1403 #define IND (MACH64_FALLBACK_BIT)
1404 #define TAG(x) x##_fallback
1405 #include "tnl_dd/t_dd_tritmp.h"
1406
1407 #define IND (MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
1408 #define TAG(x) x##_offset_fallback
1409 #include "tnl_dd/t_dd_tritmp.h"
1410
1411 #define IND (MACH64_TWOSIDE_BIT|MACH64_FALLBACK_BIT)
1412 #define TAG(x) x##_twoside_fallback
1413 #include "tnl_dd/t_dd_tritmp.h"
1414
1415 #define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
1416 #define TAG(x) x##_twoside_offset_fallback
1417 #include "tnl_dd/t_dd_tritmp.h"
1418
1419 #define IND (MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
1420 #define TAG(x) x##_unfilled_fallback
1421 #include "tnl_dd/t_dd_tritmp.h"
1422
1423 #define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
1424 #define TAG(x) x##_offset_unfilled_fallback
1425 #include "tnl_dd/t_dd_tritmp.h"
1426
1427 #define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
1428 #define TAG(x) x##_twoside_unfilled_fallback
1429 #include "tnl_dd/t_dd_tritmp.h"
1430
1431 #define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT| \
1432 MACH64_FALLBACK_BIT)
1433 #define TAG(x) x##_twoside_offset_unfilled_fallback
1434 #include "tnl_dd/t_dd_tritmp.h"
1435
1436
1437 static void init_rast_tab( void )
1438 {
1439 init();
1440 init_offset();
1441 init_twoside();
1442 init_twoside_offset();
1443 init_unfilled();
1444 init_offset_unfilled();
1445 init_twoside_unfilled();
1446 init_twoside_offset_unfilled();
1447 init_fallback();
1448 init_offset_fallback();
1449 init_twoside_fallback();
1450 init_twoside_offset_fallback();
1451 init_unfilled_fallback();
1452 init_offset_unfilled_fallback();
1453 init_twoside_unfilled_fallback();
1454 init_twoside_offset_unfilled_fallback();
1455 }
1456
1457
1458 /***********************************************************************
1459 * Rasterization fallback helpers *
1460 ***********************************************************************/
1461
1462
1463 /* This code is hit only when a mix of accelerated and unaccelerated
1464 * primitives are being drawn, and only for the unaccelerated
1465 * primitives.
1466 */
1467 static void
1468 mach64_fallback_tri( mach64ContextPtr mmesa,
1469 mach64Vertex *v0,
1470 mach64Vertex *v1,
1471 mach64Vertex *v2 )
1472 {
1473 GLcontext *ctx = mmesa->glCtx;
1474 SWvertex v[3];
1475 mach64_translate_vertex( ctx, v0, &v[0] );
1476 mach64_translate_vertex( ctx, v1, &v[1] );
1477 mach64_translate_vertex( ctx, v2, &v[2] );
1478 _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
1479 }
1480
1481
1482 static void
1483 mach64_fallback_line( mach64ContextPtr mmesa,
1484 mach64Vertex *v0,
1485 mach64Vertex *v1 )
1486 {
1487 GLcontext *ctx = mmesa->glCtx;
1488 SWvertex v[2];
1489 mach64_translate_vertex( ctx, v0, &v[0] );
1490 mach64_translate_vertex( ctx, v1, &v[1] );
1491 _swrast_Line( ctx, &v[0], &v[1] );
1492 }
1493
1494
1495 static void
1496 mach64_fallback_point( mach64ContextPtr mmesa,
1497 mach64Vertex *v0 )
1498 {
1499 GLcontext *ctx = mmesa->glCtx;
1500 SWvertex v[1];
1501 mach64_translate_vertex( ctx, v0, &v[0] );
1502 _swrast_Point( ctx, &v[0] );
1503 }
1504
1505
1506
1507 /**********************************************************************/
1508 /* Render unclipped begin/end objects */
1509 /**********************************************************************/
1510
1511 #define VERT(x) (mach64Vertex *)(mach64verts + ((x) * vertsize * sizeof(int)))
1512 #define RENDER_POINTS( start, count ) \
1513 for ( ; start < count ; start++) \
1514 mach64_draw_point( mmesa, VERT(start) )
1515 #define RENDER_LINE( v0, v1 ) \
1516 mach64_draw_line( mmesa, VERT(v0), VERT(v1) )
1517 #define RENDER_TRI( v0, v1, v2 ) \
1518 mach64_draw_triangle( mmesa, VERT(v0), VERT(v1), VERT(v2) )
1519 #define RENDER_QUAD( v0, v1, v2, v3 ) \
1520 mach64_draw_quad( mmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
1521 #define INIT(x) do { \
1522 if (0) fprintf(stderr, "%s\n", __FUNCTION__); \
1523 mach64RenderPrimitive( ctx, x ); \
1524 } while (0)
1525 #undef LOCAL_VARS
1526 #define LOCAL_VARS \
1527 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \
1528 const GLuint vertsize = mmesa->vertex_size; \
1529 const char *mach64verts = (char *)mmesa->verts; \
1530 const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
1531 (void) elt;
1532 #define RESET_STIPPLE
1533 #define RESET_OCCLUSION
1534 #define PRESERVE_VB_DEFS
1535 #define ELT(x) (x)
1536 #define TAG(x) mach64_##x##_verts
1537 #include "tnl/t_vb_rendertmp.h"
1538 #undef ELT
1539 #undef TAG
1540 #define TAG(x) mach64_##x##_elts
1541 #define ELT(x) elt[x]
1542 #include "tnl/t_vb_rendertmp.h"
1543
1544
1545 /**********************************************************************/
1546 /* Render clipped primitives */
1547 /**********************************************************************/
1548
1549 static void mach64RenderClippedPoly( GLcontext *ctx, const GLuint *elts,
1550 GLuint n )
1551 {
1552 mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
1553 TNLcontext *tnl = TNL_CONTEXT(ctx);
1554 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1555 GLuint prim = mmesa->render_primitive;
1556
1557 /* Render the new vertices as an unclipped polygon.
1558 */
1559 {
1560 GLuint *tmp = VB->Elts;
1561 VB->Elts = (GLuint *)elts;
1562 tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
1563 VB->Elts = tmp;
1564 }
1565
1566 /* Restore the render primitive
1567 */
1568 if (prim != GL_POLYGON)
1569 tnl->Driver.Render.PrimitiveNotify( ctx, prim );
1570
1571 }
1572
1573 static void mach64RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
1574 {
1575 TNLcontext *tnl = TNL_CONTEXT(ctx);
1576 tnl->Driver.Render.Line( ctx, ii, jj );
1577 }
1578
1579 #if MACH64_NATIVE_VTXFMT
1580 static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
1581 GLuint n )
1582 {
1583 mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
1584 const GLuint vertsize = mmesa->vertex_size;
1585 GLint a;
1586 GLfloat ooa;
1587 GLuint xy;
1588 const GLuint xyoffset = 9;
1589 GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
1590 unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2);
1591 CARD32 *vb, *vbchk;
1592 GLubyte *mach64verts = (GLubyte *)mmesa->verts;
1593 mach64VertexPtr v0, v1, v2;
1594 int i;
1595
1596 v0 = (mach64VertexPtr)VERT(elts[1]);
1597 v1 = (mach64VertexPtr)VERT(elts[2]);
1598 v2 = (mach64VertexPtr)VERT(elts[0]);
1599
1600 xy = LE32_IN( &v0->ui[xyoffset] );
1601 xx[0] = (GLshort)( xy >> 16 );
1602 yy[0] = (GLshort)( xy & 0xffff );
1603
1604 xy = LE32_IN( &v1->ui[xyoffset] );
1605 xx[1] = (GLshort)( xy >> 16 );
1606 yy[1] = (GLshort)( xy & 0xffff );
1607
1608 xy = LE32_IN( &v2->ui[xyoffset] );
1609 xx[2] = (GLshort)( xy >> 16 );
1610 yy[2] = (GLshort)( xy & 0xffff );
1611
1612 a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
1613 (yy[0] - yy[2]) * (xx[1] - xx[2]);
1614
1615 if ( (mmesa->backface_sign &&
1616 ((a < 0 && !signbit( mmesa->backface_sign )) ||
1617 (a > 0 && signbit( mmesa->backface_sign )))) ) {
1618 /* cull polygon */
1619 if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
1620 fprintf(stderr,"Polygon culled\n");
1621 return;
1622 }
1623
1624 ooa = 16.0 / a;
1625
1626 vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
1627 vbchk = vb + vbsiz;
1628
1629 COPY_VERTEX( vb, vertsize, v0, 1 );
1630 COPY_VERTEX( vb, vertsize, v1, 2 );
1631 COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
1632 LE32_OUT( vb++, *(CARD32 *)&ooa );
1633
1634 i = 3;
1635 while (1) {
1636 if (i >= n)
1637 break;
1638 v0 = (mach64VertexPtr)VERT(elts[i]);
1639 i++;
1640
1641 xy = LE32_IN( &v0->ui[xyoffset] );
1642 xx[0] = (GLshort)( xy >> 16 );
1643 yy[0] = (GLshort)( xy & 0xffff );
1644
1645 a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
1646 (yy[0] - yy[2]) * (xx[1] - xx[2]);
1647 ooa = 16.0 / a;
1648
1649 COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
1650 LE32_OUT( vb++, *(CARD32 *)&ooa );
1651
1652 if (i >= n)
1653 break;
1654 v1 = (mach64VertexPtr)VERT(elts[i]);
1655 i++;
1656
1657 xy = LE32_IN( &v1->ui[xyoffset] );
1658 xx[1] = (GLshort)( xy >> 16 );
1659 yy[1] = (GLshort)( xy & 0xffff );
1660
1661 a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
1662 (yy[0] - yy[2]) * (xx[1] - xx[2]);
1663 ooa = 16.0 / a;
1664
1665 COPY_VERTEX_OOA( vb, vertsize, v1, 2 );
1666 LE32_OUT( vb++, *(CARD32 *)&ooa );
1667 }
1668
1669 assert( vb == vbchk );
1670 }
1671 #else
1672 static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
1673 GLuint n )
1674 {
1675 mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
1676 GLubyte *mach64verts = (GLubyte *)mmesa->verts;
1677 const GLuint *start = (const GLuint *)VERT(elts[0]);
1678 int i;
1679
1680 for (i = 2 ; i < n ; i++) {
1681 mach64_draw_triangle( mmesa,
1682 VERT(elts[i-1]),
1683 VERT(elts[i]),
1684 (mach64VertexPtr) start
1685 );
1686 }
1687 }
1688 #endif /* MACH64_NATIVE_VTXFMT */
1689
1690 /**********************************************************************/
1691 /* Choose render functions */
1692 /**********************************************************************/
1693
1694 #define _MACH64_NEW_RENDER_STATE (_DD_NEW_POINT_SMOOTH | \
1695 _DD_NEW_LINE_SMOOTH | \
1696 _DD_NEW_LINE_STIPPLE | \
1697 _DD_NEW_TRI_SMOOTH | \
1698 _DD_NEW_TRI_STIPPLE | \
1699 _NEW_POLYGONSTIPPLE | \
1700 _DD_NEW_TRI_UNFILLED | \
1701 _DD_NEW_TRI_LIGHT_TWOSIDE | \
1702 _DD_NEW_TRI_OFFSET) \
1703
1704 #define POINT_FALLBACK (DD_POINT_SMOOTH)
1705 #define LINE_FALLBACK (DD_LINE_SMOOTH|DD_LINE_STIPPLE)
1706 #define TRI_FALLBACK (DD_TRI_SMOOTH|DD_TRI_STIPPLE)
1707 #define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
1708 #define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
1709
1710
1711 static void mach64ChooseRenderState(GLcontext *ctx)
1712 {
1713 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
1714 GLuint flags = ctx->_TriangleCaps;
1715 GLuint index = 0;
1716
1717 if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
1718 mmesa->draw_point = mach64_draw_point;
1719 mmesa->draw_line = mach64_draw_line;
1720 mmesa->draw_tri = mach64_draw_triangle;
1721
1722 if (flags & ANY_RASTER_FLAGS) {
1723 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= MACH64_TWOSIDE_BIT;
1724 if (flags & DD_TRI_OFFSET) index |= MACH64_OFFSET_BIT;
1725 if (flags & DD_TRI_UNFILLED) index |= MACH64_UNFILLED_BIT;
1726 }
1727
1728 /* Hook in fallbacks for specific primitives.
1729 */
1730 if (flags & (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)) {
1731 if (flags & POINT_FALLBACK) mmesa->draw_point = mach64_fallback_point;
1732 if (flags & LINE_FALLBACK) mmesa->draw_line = mach64_fallback_line;
1733 if (flags & TRI_FALLBACK) mmesa->draw_tri = mach64_fallback_tri;
1734 index |= MACH64_FALLBACK_BIT;
1735 }
1736 }
1737
1738 if (index != mmesa->RenderIndex) {
1739 TNLcontext *tnl = TNL_CONTEXT(ctx);
1740 tnl->Driver.Render.Points = rast_tab[index].points;
1741 tnl->Driver.Render.Line = rast_tab[index].line;
1742 tnl->Driver.Render.Triangle = rast_tab[index].triangle;
1743 tnl->Driver.Render.Quad = rast_tab[index].quad;
1744
1745 if (index == 0) {
1746 tnl->Driver.Render.PrimTabVerts = mach64_render_tab_verts;
1747 tnl->Driver.Render.PrimTabElts = mach64_render_tab_elts;
1748 tnl->Driver.Render.ClippedLine = rast_tab[index].line;
1749 tnl->Driver.Render.ClippedPolygon = mach64FastRenderClippedPoly;
1750 } else {
1751 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
1752 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
1753 tnl->Driver.Render.ClippedLine = mach64RenderClippedLine;
1754 tnl->Driver.Render.ClippedPolygon = mach64RenderClippedPoly;
1755 }
1756
1757 mmesa->RenderIndex = index;
1758 }
1759 }
1760
1761 /**********************************************************************/
1762 /* Validate state at pipeline start */
1763 /**********************************************************************/
1764
1765 static void mach64RunPipeline( GLcontext *ctx )
1766 {
1767 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
1768
1769 if (mmesa->new_state)
1770 mach64DDUpdateHWState( ctx );
1771
1772 if (!mmesa->Fallback && mmesa->NewGLState) {
1773 if (mmesa->NewGLState & _MACH64_NEW_VERTEX_STATE)
1774 mach64ChooseVertexState( ctx );
1775
1776 if (mmesa->NewGLState & _MACH64_NEW_RENDER_STATE)
1777 mach64ChooseRenderState( ctx );
1778
1779 mmesa->NewGLState = 0;
1780 }
1781
1782 _tnl_run_pipeline( ctx );
1783 }
1784
1785 /**********************************************************************/
1786 /* High level hooks for t_vb_render.c */
1787 /**********************************************************************/
1788
1789 /* This is called when Mesa switches between rendering triangle
1790 * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
1791 * and lines, points and bitmaps.
1792 */
1793
1794 static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim )
1795 {
1796 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
1797
1798 mmesa->new_state |= MACH64_NEW_CONTEXT;
1799 mmesa->dirty |= MACH64_UPLOAD_CONTEXT;
1800
1801 if (mmesa->hw_primitive != hwprim) {
1802 FLUSH_BATCH( mmesa );
1803 mmesa->hw_primitive = hwprim;
1804 }
1805 }
1806
1807 static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim )
1808 {
1809 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
1810 GLuint hw = hw_prim[prim];
1811
1812 mmesa->render_primitive = prim;
1813
1814 if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
1815 return;
1816
1817 mach64RasterPrimitive( ctx, hw );
1818 }
1819
1820
1821 static void mach64RenderStart( GLcontext *ctx )
1822 {
1823 /* Check for projective texturing. Make sure all texcoord
1824 * pointers point to something. (fix in mesa?)
1825 */
1826 mach64CheckTexSizes( ctx );
1827 }
1828
1829 static void mach64RenderFinish( GLcontext *ctx )
1830 {
1831 if (MACH64_CONTEXT(ctx)->RenderIndex & MACH64_FALLBACK_BIT)
1832 _swrast_flush( ctx );
1833 }
1834
1835
1836 /**********************************************************************/
1837 /* Transition to/from hardware rasterization. */
1838 /**********************************************************************/
1839
1840 void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
1841 {
1842 TNLcontext *tnl = TNL_CONTEXT(ctx);
1843 mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
1844 GLuint oldfallback = mmesa->Fallback;
1845
1846 if (mode) {
1847 if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
1848 fprintf(stderr,"Set Fallback: %d\n", bit);
1849 mmesa->Fallback |= bit;
1850 if (oldfallback == 0) {
1851 FLUSH_BATCH( mmesa );
1852 _swsetup_Wakeup( ctx );
1853 mmesa->RenderIndex = ~0;
1854 }
1855 }
1856 else {
1857 if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
1858 fprintf(stderr,"Clear Fallback: %d\n", bit);
1859 mmesa->Fallback &= ~bit;
1860 if (oldfallback == bit) {
1861 _swrast_flush( ctx );
1862 tnl->Driver.Render.Start = mach64RenderStart;
1863 tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
1864 tnl->Driver.Render.Finish = mach64RenderFinish;
1865 tnl->Driver.Render.BuildVertices = mach64BuildVertices;
1866 mmesa->NewGLState |= (_MACH64_NEW_RENDER_STATE|
1867 _MACH64_NEW_VERTEX_STATE);
1868 }
1869 }
1870 }
1871
1872 /**********************************************************************/
1873 /* Initialization. */
1874 /**********************************************************************/
1875
1876 void mach64InitTriFuncs( GLcontext *ctx )
1877 {
1878 TNLcontext *tnl = TNL_CONTEXT(ctx);
1879 static int firsttime = 1;
1880
1881 if (firsttime) {
1882 init_rast_tab();
1883 firsttime = 0;
1884 }
1885
1886 tnl->Driver.RunPipeline = mach64RunPipeline;
1887 tnl->Driver.Render.Start = mach64RenderStart;
1888 tnl->Driver.Render.Finish = mach64RenderFinish;
1889 tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
1890 tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
1891 tnl->Driver.Render.BuildVertices = mach64BuildVertices;
1892
1893 /* mach64Fallback( ctx, 0x100000, 1 ); */
1894 }