Remove the tcl fallback for texture rectangle (by manipulating the texture matrix...
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_maos_arrays.c
1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
2 /**************************************************************************
3
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 Tungsten Graphics Inc., Cedar Park, Texas.
6
7 All Rights Reserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 **************************************************************************/
30
31 /*
32 * Authors:
33 * Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "imports.h"
38 #include "mtypes.h"
39 #include "macros.h"
40
41 #include "swrast_setup/swrast_setup.h"
42 #include "math/m_translate.h"
43 #include "tnl/tnl.h"
44 #include "tnl/t_context.h"
45
46 #include "radeon_context.h"
47 #include "radeon_ioctl.h"
48 #include "radeon_state.h"
49 #include "radeon_swtcl.h"
50 #include "radeon_maos.h"
51
52 #if 0
53 /* Usage:
54 * - from radeon_tcl_render
55 * - call radeonEmitArrays to ensure uptodate arrays in dma
56 * - emit primitives (new type?) which reference the data
57 * -- need to use elts for lineloop, quads, quadstrip/flat
58 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
59 *
60 */
61 static void emit_ubyte_rgba3( GLcontext *ctx,
62 struct radeon_dma_region *rvb,
63 char *data,
64 int stride,
65 int count )
66 {
67 int i;
68 radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
69
70 if (RADEON_DEBUG & DEBUG_VERTS)
71 fprintf(stderr, "%s count %d stride %d out %p\n",
72 __FUNCTION__, count, stride, (void *)out);
73
74 for (i = 0; i < count; i++) {
75 out->red = *data;
76 out->green = *(data+1);
77 out->blue = *(data+2);
78 out->alpha = 0xFF;
79 out++;
80 data += stride;
81 }
82 }
83
84 static void emit_ubyte_rgba4( GLcontext *ctx,
85 struct radeon_dma_region *rvb,
86 char *data,
87 int stride,
88 int count )
89 {
90 int i;
91 int *out = (int *)(rvb->address + rvb->start);
92
93 if (RADEON_DEBUG & DEBUG_VERTS)
94 fprintf(stderr, "%s count %d stride %d\n",
95 __FUNCTION__, count, stride);
96
97 if (stride == 4)
98 COPY_DWORDS( out, data, count );
99 else
100 for (i = 0; i < count; i++) {
101 *out++ = LE32_TO_CPU(*(int *)data);
102 data += stride;
103 }
104 }
105
106
107 static void emit_ubyte_rgba( GLcontext *ctx,
108 struct radeon_dma_region *rvb,
109 char *data,
110 int size,
111 int stride,
112 int count )
113 {
114 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
115
116 if (RADEON_DEBUG & DEBUG_VERTS)
117 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
118
119 assert (!rvb->buf);
120
121 if (stride == 0) {
122 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
123 count = 1;
124 rvb->aos_start = GET_START(rvb);
125 rvb->aos_stride = 0;
126 rvb->aos_size = 1;
127 }
128 else {
129 radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
130 rvb->aos_start = GET_START(rvb);
131 rvb->aos_stride = 1;
132 rvb->aos_size = 1;
133 }
134
135 /* Emit the data
136 */
137 switch (size) {
138 case 3:
139 emit_ubyte_rgba3( ctx, rvb, data, stride, count );
140 break;
141 case 4:
142 emit_ubyte_rgba4( ctx, rvb, data, stride, count );
143 break;
144 default:
145 assert(0);
146 exit(1);
147 break;
148 }
149 }
150 #endif
151
152 #if defined(USE_X86_ASM)
153 #define COPY_DWORDS( dst, src, nr ) \
154 do { \
155 int __tmp; \
156 __asm__ __volatile__( "rep ; movsl" \
157 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
158 : "0" (nr), \
159 "D" ((long)dst), \
160 "S" ((long)src) ); \
161 } while (0)
162 #else
163 #define COPY_DWORDS( dst, src, nr ) \
164 do { \
165 int j; \
166 for ( j = 0 ; j < nr ; j++ ) \
167 dst[j] = ((int *)src)[j]; \
168 dst += nr; \
169 } while (0)
170 #endif
171
172
173 static void emit_vec4( GLcontext *ctx,
174 struct radeon_dma_region *rvb,
175 char *data,
176 int stride,
177 int count )
178 {
179 int i;
180 int *out = (int *)(rvb->address + rvb->start);
181
182 if (RADEON_DEBUG & DEBUG_VERTS)
183 fprintf(stderr, "%s count %d stride %d\n",
184 __FUNCTION__, count, stride);
185
186 if (stride == 4)
187 COPY_DWORDS( out, data, count );
188 else
189 for (i = 0; i < count; i++) {
190 out[0] = *(int *)data;
191 out++;
192 data += stride;
193 }
194 }
195
196
197 static void emit_vec8( GLcontext *ctx,
198 struct radeon_dma_region *rvb,
199 char *data,
200 int stride,
201 int count )
202 {
203 int i;
204 int *out = (int *)(rvb->address + rvb->start);
205
206 if (RADEON_DEBUG & DEBUG_VERTS)
207 fprintf(stderr, "%s count %d stride %d\n",
208 __FUNCTION__, count, stride);
209
210 if (stride == 8)
211 COPY_DWORDS( out, data, count*2 );
212 else
213 for (i = 0; i < count; i++) {
214 out[0] = *(int *)data;
215 out[1] = *(int *)(data+4);
216 out += 2;
217 data += stride;
218 }
219 }
220
221 static void emit_vec12( GLcontext *ctx,
222 struct radeon_dma_region *rvb,
223 char *data,
224 int stride,
225 int count )
226 {
227 int i;
228 int *out = (int *)(rvb->address + rvb->start);
229
230 if (RADEON_DEBUG & DEBUG_VERTS)
231 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
232 __FUNCTION__, count, stride, (void *)out, (void *)data);
233
234 if (stride == 12)
235 COPY_DWORDS( out, data, count*3 );
236 else
237 for (i = 0; i < count; i++) {
238 out[0] = *(int *)data;
239 out[1] = *(int *)(data+4);
240 out[2] = *(int *)(data+8);
241 out += 3;
242 data += stride;
243 }
244 }
245
246 static void emit_vec16( GLcontext *ctx,
247 struct radeon_dma_region *rvb,
248 char *data,
249 int stride,
250 int count )
251 {
252 int i;
253 int *out = (int *)(rvb->address + rvb->start);
254
255 if (RADEON_DEBUG & DEBUG_VERTS)
256 fprintf(stderr, "%s count %d stride %d\n",
257 __FUNCTION__, count, stride);
258
259 if (stride == 16)
260 COPY_DWORDS( out, data, count*4 );
261 else
262 for (i = 0; i < count; i++) {
263 out[0] = *(int *)data;
264 out[1] = *(int *)(data+4);
265 out[2] = *(int *)(data+8);
266 out[3] = *(int *)(data+12);
267 out += 4;
268 data += stride;
269 }
270 }
271
272
273 static void emit_vector( GLcontext *ctx,
274 struct radeon_dma_region *rvb,
275 char *data,
276 int size,
277 int stride,
278 int count )
279 {
280 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
281
282 if (RADEON_DEBUG & DEBUG_VERTS)
283 fprintf(stderr, "%s count %d size %d stride %d\n",
284 __FUNCTION__, count, size, stride);
285
286 assert (!rvb->buf);
287
288 if (stride == 0) {
289 radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
290 count = 1;
291 rvb->aos_start = GET_START(rvb);
292 rvb->aos_stride = 0;
293 rvb->aos_size = size;
294 }
295 else {
296 radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
297 rvb->aos_start = GET_START(rvb);
298 rvb->aos_stride = size;
299 rvb->aos_size = size;
300 }
301
302 /* Emit the data
303 */
304 switch (size) {
305 case 1:
306 emit_vec4( ctx, rvb, data, stride, count );
307 break;
308 case 2:
309 emit_vec8( ctx, rvb, data, stride, count );
310 break;
311 case 3:
312 emit_vec12( ctx, rvb, data, stride, count );
313 break;
314 case 4:
315 emit_vec16( ctx, rvb, data, stride, count );
316 break;
317 default:
318 assert(0);
319 exit(1);
320 break;
321 }
322
323 }
324
325
326
327 static void emit_s0_vec( GLcontext *ctx,
328 struct radeon_dma_region *rvb,
329 char *data,
330 int stride,
331 int count )
332 {
333 int i;
334 int *out = (int *)(rvb->address + rvb->start);
335
336 if (RADEON_DEBUG & DEBUG_VERTS)
337 fprintf(stderr, "%s count %d stride %d\n",
338 __FUNCTION__, count, stride);
339
340 for (i = 0; i < count; i++) {
341 out[0] = *(int *)data;
342 out[1] = 0;
343 out += 2;
344 data += stride;
345 }
346 }
347
348 static void emit_stq_vec( GLcontext *ctx,
349 struct radeon_dma_region *rvb,
350 char *data,
351 int stride,
352 int count )
353 {
354 int i;
355 int *out = (int *)(rvb->address + rvb->start);
356
357 if (RADEON_DEBUG & DEBUG_VERTS)
358 fprintf(stderr, "%s count %d stride %d\n",
359 __FUNCTION__, count, stride);
360
361 for (i = 0; i < count; i++) {
362 out[0] = *(int *)data;
363 out[1] = *(int *)(data+4);
364 out[2] = *(int *)(data+12);
365 out += 3;
366 data += stride;
367 }
368 }
369
370
371
372
373 static void emit_tex_vector( GLcontext *ctx,
374 struct radeon_dma_region *rvb,
375 char *data,
376 int size,
377 int stride,
378 int count )
379 {
380 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
381 int emitsize;
382
383 if (RADEON_DEBUG & DEBUG_VERTS)
384 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
385
386 assert (!rvb->buf);
387
388 switch (size) {
389 case 4: emitsize = 3; break;
390 case 3: emitsize = 3; break;
391 default: emitsize = 2; break;
392 }
393
394
395 if (stride == 0) {
396 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
397 count = 1;
398 rvb->aos_start = GET_START(rvb);
399 rvb->aos_stride = 0;
400 rvb->aos_size = emitsize;
401 }
402 else {
403 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
404 rvb->aos_start = GET_START(rvb);
405 rvb->aos_stride = emitsize;
406 rvb->aos_size = emitsize;
407 }
408
409
410 /* Emit the data
411 */
412 switch (size) {
413 case 1:
414 emit_s0_vec( ctx, rvb, data, stride, count );
415 break;
416 case 2:
417 emit_vec8( ctx, rvb, data, stride, count );
418 break;
419 case 3:
420 emit_vec12( ctx, rvb, data, stride, count );
421 break;
422 case 4:
423 emit_stq_vec( ctx, rvb, data, stride, count );
424 break;
425 default:
426 assert(0);
427 exit(1);
428 break;
429 }
430 }
431
432
433
434
435 /* Emit any changed arrays to new GART memory, re-emit a packet to
436 * update the arrays.
437 */
438 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
439 {
440 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
441 struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
442 struct radeon_dma_region **component = rmesa->tcl.aos_components;
443 GLuint nr = 0;
444 GLuint vfmt = 0;
445 GLuint count = VB->Count;
446 GLuint vtx;
447
448 #if 0
449 if (RADEON_DEBUG & DEBUG_VERTS)
450 _tnl_print_vert_flags( __FUNCTION__, inputs );
451 #endif
452
453 if (1) {
454 if (!rmesa->tcl.obj.buf)
455 emit_vector( ctx,
456 &rmesa->tcl.obj,
457 (char *)VB->ObjPtr->data,
458 VB->ObjPtr->size,
459 VB->ObjPtr->stride,
460 count);
461
462 switch( VB->ObjPtr->size ) {
463 case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
464 case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
465 case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
466 default:
467 break;
468 }
469 component[nr++] = &rmesa->tcl.obj;
470 }
471
472
473 if (inputs & VERT_BIT_NORMAL) {
474 if (!rmesa->tcl.norm.buf)
475 emit_vector( ctx,
476 &(rmesa->tcl.norm),
477 (char *)VB->NormalPtr->data,
478 3,
479 VB->NormalPtr->stride,
480 count);
481
482 vfmt |= RADEON_CP_VC_FRMT_N0;
483 component[nr++] = &rmesa->tcl.norm;
484 }
485
486 if (inputs & VERT_BIT_COLOR0) {
487 int emitsize;
488 if (VB->ColorPtr[0]->size == 4 &&
489 (VB->ColorPtr[0]->stride != 0 ||
490 VB->ColorPtr[0]->data[0][3] != 1.0)) {
491 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
492 emitsize = 4;
493 }
494
495 else {
496 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
497 emitsize = 3;
498 }
499
500 if (!rmesa->tcl.rgba.buf)
501 emit_vector( ctx,
502 &(rmesa->tcl.rgba),
503 (char *)VB->ColorPtr[0]->data,
504 emitsize,
505 VB->ColorPtr[0]->stride,
506 count);
507
508
509 component[nr++] = &rmesa->tcl.rgba;
510 }
511
512
513 if (inputs & VERT_BIT_COLOR1) {
514 if (!rmesa->tcl.spec.buf) {
515
516 emit_vector( ctx,
517 &rmesa->tcl.spec,
518 (char *)VB->SecondaryColorPtr[0]->data,
519 3,
520 VB->SecondaryColorPtr[0]->stride,
521 count);
522 }
523
524 vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
525 component[nr++] = &rmesa->tcl.spec;
526 }
527
528 vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
529 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
530
531 if (inputs & VERT_BIT_TEX0) {
532 if (!rmesa->tcl.tex[0].buf)
533 emit_tex_vector( ctx,
534 &(rmesa->tcl.tex[0]),
535 (char *)VB->TexCoordPtr[0]->data,
536 VB->TexCoordPtr[0]->size,
537 VB->TexCoordPtr[0]->stride,
538 count );
539
540 vfmt |= RADEON_CP_VC_FRMT_ST0;
541 /* assume we need the 3rd coord if texgen is active for r/q OR at least 3
542 coords are submitted. This may not be 100% correct */
543 if ( (VB->TexCoordPtr[0]->size >= 3) {
544 vtx |= RADEON_TCL_VTX_Q0;
545 vfmt |= RADEON_CP_VC_FRMT_Q0;
546 }
547 if ( (ctx->Texture.Unit[0].TexGenEnabled & (R_BIT | Q_BIT)) )
548 vtx |= RADEON_TCL_VTX_Q0;
549 else if (VB->TexCoordPtr[0]->size >= 3) {
550 GLuint swaptexmatcol = (VB->TexCoordPtr[0]->size - 3);
551 if ((rmesa->NeedTexMatrix & 1) &&
552 (swaptexmatcol != (rmesa->TexMatColSwap & 1)))
553 radeonUploadTexMatrix( rmesa, rmesa->tmpmat[0].m, 0, swaptexmatcol ) ;
554 }
555 component[nr++] = &rmesa->tcl.tex[0];
556 }
557
558 if (inputs & VERT_BIT_TEX1) {
559 if (!rmesa->tcl.tex[1].buf)
560 emit_tex_vector( ctx,
561 &(rmesa->tcl.tex[1]),
562 (char *)VB->TexCoordPtr[1]->data,
563 VB->TexCoordPtr[1]->size,
564 VB->TexCoordPtr[1]->stride,
565 count );
566
567 vfmt |= RADEON_CP_VC_FRMT_ST1;
568 if ( (VB->TexCoordPtr[1]->size >= 3) {
569 vtx |= RADEON_TCL_VTX_Q1;
570 vfmt |= RADEON_CP_VC_FRMT_Q1;
571 }
572 if ( (ctx->Texture.Unit[1].TexGenEnabled & (R_BIT | Q_BIT)) )
573 vtx |= RADEON_TCL_VTX_Q1;
574 else if (VB->TexCoordPtr[1]->size >= 3) {
575 GLuint swaptexmatcol = (VB->TexCoordPtr[1]->size - 3);
576 if (((rmesa->NeedTexMatrix >> 1) & 1) &&
577 (swaptexmatcol != ((rmesa->TexMatColSwap >> 1) & 1)))
578 radeonUploadTexMatrix( rmesa, rmesa->tmpmat[1].m, 1, swaptexmatcol ) ;
579 }
580 component[nr++] = &rmesa->tcl.tex[1];
581 }
582
583 if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
584 RADEON_STATECHANGE( rmesa, tcl );
585 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
586 }
587
588 rmesa->tcl.nr_aos_components = nr;
589 rmesa->tcl.vertex_format = vfmt;
590 }
591
592
593 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
594 {
595 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
596
597 #if 0
598 if (RADEON_DEBUG & DEBUG_VERTS)
599 _tnl_print_vert_flags( __FUNCTION__, newinputs );
600 #endif
601
602 if (newinputs & VERT_BIT_POS)
603 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
604
605 if (newinputs & VERT_BIT_NORMAL)
606 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
607
608 if (newinputs & VERT_BIT_COLOR0)
609 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
610
611 if (newinputs & VERT_BIT_COLOR1)
612 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
613
614 if (newinputs & VERT_BIT_TEX0)
615 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
616
617 if (newinputs & VERT_BIT_TEX1)
618 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
619 }