patch to import Jon Smirl's work from Bitkeeper
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_maos_arrays.c
1 /* $XFree86$ */
2 /**************************************************************************
3
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 Tungsten Graphics Inc., Cedar Park, Texas.
6
7 All Rights Reserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 **************************************************************************/
30
31 /*
32 * Authors:
33 * Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "imports.h"
38 #include "mtypes.h"
39 #include "mmath.h"
40 #include "macros.h"
41
42 #include "swrast_setup/swrast_setup.h"
43 #include "math/m_translate.h"
44 #include "tnl/tnl.h"
45 #include "tnl/t_context.h"
46 #include "tnl/t_imm_debug.h"
47
48 #include "radeon_context.h"
49 #include "radeon_ioctl.h"
50 #include "radeon_state.h"
51 #include "radeon_swtcl.h"
52 #include "radeon_maos.h"
53
54 /* Usage:
55 * - from radeon_tcl_render
56 * - call radeonEmitArrays to ensure uptodate arrays in dma
57 * - emit primitives (new type?) which reference the data
58 * -- need to use elts for lineloop, quads, quadstrip/flat
59 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
60 *
61 */
62 static void emit_ubyte_rgba3( GLcontext *ctx,
63 struct radeon_dma_region *rvb,
64 char *data,
65 int stride,
66 int count )
67 {
68 int i;
69 radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
70
71 if (RADEON_DEBUG & DEBUG_VERTS)
72 fprintf(stderr, "%s count %d stride %d out %p\n",
73 __FUNCTION__, count, stride, out);
74
75 for (i = 0; i < count; i++) {
76 out->red = *data;
77 out->green = *(data+1);
78 out->blue = *(data+2);
79 out->alpha = 0xFF;
80 out++;
81 data += stride;
82 }
83 }
84
85
86 #if defined(USE_X86_ASM)
87 #define COPY_DWORDS( dst, src, nr ) \
88 do { \
89 int __tmp; \
90 __asm__ __volatile__( "rep ; movsl" \
91 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
92 : "0" (nr), \
93 "D" ((long)dst), \
94 "S" ((long)src) ); \
95 } while (0)
96 #else
97 #define COPY_DWORDS( dst, src, nr ) \
98 do { \
99 int j; \
100 for ( j = 0 ; j < nr ; j++ ) \
101 dst[j] = ((int *)src)[j]; \
102 dst += nr; \
103 } while (0)
104 #endif
105
106
107
108 static void emit_ubyte_rgba4( GLcontext *ctx,
109 struct radeon_dma_region *rvb,
110 char *data,
111 int stride,
112 int count )
113 {
114 int i;
115 int *out = (int *)(rvb->address + rvb->start);
116
117 if (RADEON_DEBUG & DEBUG_VERTS)
118 fprintf(stderr, "%s count %d stride %d\n",
119 __FUNCTION__, count, stride);
120
121 if (stride == 4)
122 COPY_DWORDS( out, data, count );
123 else
124 for (i = 0; i < count; i++) {
125 *out++ = LE32_TO_CPU(*(int *)data);
126 data += stride;
127 }
128 }
129
130
131 static void emit_ubyte_rgba( GLcontext *ctx,
132 struct radeon_dma_region *rvb,
133 char *data,
134 int size,
135 int stride,
136 int count )
137 {
138 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
139
140 if (RADEON_DEBUG & DEBUG_VERTS)
141 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
142
143 assert (!rvb->buf);
144
145 if (stride == 0) {
146 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
147 count = 1;
148 rvb->aos_start = GET_START(rvb);
149 rvb->aos_stride = 0;
150 rvb->aos_size = 1;
151 }
152 else {
153 radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
154 rvb->aos_start = GET_START(rvb);
155 rvb->aos_stride = 1;
156 rvb->aos_size = 1;
157 }
158
159 /* Emit the data
160 */
161 switch (size) {
162 case 3:
163 emit_ubyte_rgba3( ctx, rvb, data, stride, count );
164 break;
165 case 4:
166 emit_ubyte_rgba4( ctx, rvb, data, stride, count );
167 break;
168 default:
169 assert(0);
170 exit(1);
171 break;
172 }
173 }
174
175
176
177
178 static void emit_vec8( GLcontext *ctx,
179 struct radeon_dma_region *rvb,
180 char *data,
181 int stride,
182 int count )
183 {
184 int i;
185 int *out = (int *)(rvb->address + rvb->start);
186
187 if (RADEON_DEBUG & DEBUG_VERTS)
188 fprintf(stderr, "%s count %d stride %d\n",
189 __FUNCTION__, count, stride);
190
191 if (stride == 8)
192 COPY_DWORDS( out, data, count*2 );
193 else
194 for (i = 0; i < count; i++) {
195 out[0] = *(int *)data;
196 out[1] = *(int *)(data+4);
197 out += 2;
198 data += stride;
199 }
200 }
201
202 static void emit_vec12( GLcontext *ctx,
203 struct radeon_dma_region *rvb,
204 char *data,
205 int stride,
206 int count )
207 {
208 int i;
209 int *out = (int *)(rvb->address + rvb->start);
210
211 if (RADEON_DEBUG & DEBUG_VERTS)
212 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
213 __FUNCTION__, count, stride, out, data);
214
215 if (stride == 12)
216 COPY_DWORDS( out, data, count*3 );
217 else
218 for (i = 0; i < count; i++) {
219 out[0] = *(int *)data;
220 out[1] = *(int *)(data+4);
221 out[2] = *(int *)(data+8);
222 out += 3;
223 data += stride;
224 }
225 }
226
227 static void emit_vec16( GLcontext *ctx,
228 struct radeon_dma_region *rvb,
229 char *data,
230 int stride,
231 int count )
232 {
233 int i;
234 int *out = (int *)(rvb->address + rvb->start);
235
236 if (RADEON_DEBUG & DEBUG_VERTS)
237 fprintf(stderr, "%s count %d stride %d\n",
238 __FUNCTION__, count, stride);
239
240 if (stride == 16)
241 COPY_DWORDS( out, data, count*4 );
242 else
243 for (i = 0; i < count; i++) {
244 out[0] = *(int *)data;
245 out[1] = *(int *)(data+4);
246 out[2] = *(int *)(data+8);
247 out[3] = *(int *)(data+12);
248 out += 4;
249 data += stride;
250 }
251 }
252
253
254 static void emit_vector( GLcontext *ctx,
255 struct radeon_dma_region *rvb,
256 char *data,
257 int size,
258 int stride,
259 int count )
260 {
261 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
262
263 if (RADEON_DEBUG & DEBUG_VERTS)
264 fprintf(stderr, "%s count %d size %d stride %d\n",
265 __FUNCTION__, count, size, stride);
266
267 assert (!rvb->buf);
268
269 if (stride == 0) {
270 radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
271 count = 1;
272 rvb->aos_start = GET_START(rvb);
273 rvb->aos_stride = 0;
274 rvb->aos_size = size;
275 }
276 else {
277 radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
278 rvb->aos_start = GET_START(rvb);
279 rvb->aos_stride = size;
280 rvb->aos_size = size;
281 }
282
283 /* Emit the data
284 */
285 switch (size) {
286 case 2:
287 emit_vec8( ctx, rvb, data, stride, count );
288 break;
289 case 3:
290 emit_vec12( ctx, rvb, data, stride, count );
291 break;
292 case 4:
293 emit_vec16( ctx, rvb, data, stride, count );
294 break;
295 default:
296 assert(0);
297 exit(1);
298 break;
299 }
300
301 }
302
303
304
305 static void emit_s0_vec( GLcontext *ctx,
306 struct radeon_dma_region *rvb,
307 char *data,
308 int stride,
309 int count )
310 {
311 int i;
312 int *out = (int *)(rvb->address + rvb->start);
313
314 if (RADEON_DEBUG & DEBUG_VERTS)
315 fprintf(stderr, "%s count %d stride %d\n",
316 __FUNCTION__, count, stride);
317
318 for (i = 0; i < count; i++) {
319 out[0] = *(int *)data;
320 out[1] = 0;
321 out += 2;
322 data += stride;
323 }
324 }
325
326 static void emit_stq_vec( GLcontext *ctx,
327 struct radeon_dma_region *rvb,
328 char *data,
329 int stride,
330 int count )
331 {
332 int i;
333 int *out = (int *)(rvb->address + rvb->start);
334
335 if (RADEON_DEBUG & DEBUG_VERTS)
336 fprintf(stderr, "%s count %d stride %d\n",
337 __FUNCTION__, count, stride);
338
339 for (i = 0; i < count; i++) {
340 out[0] = *(int *)data;
341 out[1] = *(int *)(data+4);
342 out[2] = *(int *)(data+12);
343 out += 3;
344 data += stride;
345 }
346 }
347
348
349
350
351 static void emit_tex_vector( GLcontext *ctx,
352 struct radeon_dma_region *rvb,
353 char *data,
354 int size,
355 int stride,
356 int count )
357 {
358 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
359 int emitsize;
360
361 if (RADEON_DEBUG & DEBUG_VERTS)
362 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
363
364 assert (!rvb->buf);
365
366 switch (size) {
367 case 4: emitsize = 3; break;
368 default: emitsize = 2; break;
369 }
370
371
372 if (stride == 0) {
373 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
374 count = 1;
375 rvb->aos_start = GET_START(rvb);
376 rvb->aos_stride = 0;
377 rvb->aos_size = emitsize;
378 }
379 else {
380 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
381 rvb->aos_start = GET_START(rvb);
382 rvb->aos_stride = emitsize;
383 rvb->aos_size = emitsize;
384 }
385
386
387 /* Emit the data
388 */
389 switch (size) {
390 case 1:
391 emit_s0_vec( ctx, rvb, data, stride, count );
392 break;
393 case 2:
394 emit_vec8( ctx, rvb, data, stride, count );
395 break;
396 case 3:
397 emit_vec8( ctx, rvb, data, stride, count );
398 break;
399 case 4:
400 emit_stq_vec( ctx, rvb, data, stride, count );
401 break;
402 default:
403 assert(0);
404 exit(1);
405 break;
406 }
407 }
408
409
410
411
412 /* Emit any changed arrays to new agp memory, re-emit a packet to
413 * update the arrays.
414 */
415 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
416 {
417 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
418 struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
419 struct radeon_dma_region **component = rmesa->tcl.aos_components;
420 GLuint nr = 0;
421 GLuint vfmt = 0;
422 GLuint count = VB->Count;
423 GLuint vtx;
424
425 if (RADEON_DEBUG & DEBUG_VERTS)
426 _tnl_print_vert_flags( __FUNCTION__, inputs );
427
428 if (1) {
429 if (!rmesa->tcl.obj.buf)
430 emit_vector( ctx,
431 &rmesa->tcl.obj,
432 (char *)VB->ObjPtr->data,
433 VB->ObjPtr->size,
434 VB->ObjPtr->stride,
435 count);
436
437 switch( VB->ObjPtr->size ) {
438 case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
439 case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
440 case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
441 default:
442 }
443 component[nr++] = &rmesa->tcl.obj;
444 }
445
446
447 if (inputs & VERT_BIT_NORMAL) {
448 if (!rmesa->tcl.norm.buf)
449 emit_vector( ctx,
450 &(rmesa->tcl.norm),
451 (char *)VB->NormalPtr->data,
452 3,
453 VB->NormalPtr->stride,
454 count);
455
456 vfmt |= RADEON_CP_VC_FRMT_N0;
457 component[nr++] = &rmesa->tcl.norm;
458 }
459
460 if (inputs & VERT_BIT_COLOR0) {
461 if (VB->ColorPtr[0]->Type == GL_UNSIGNED_BYTE) {
462 if (!rmesa->tcl.rgba.buf)
463 emit_ubyte_rgba( ctx,
464 &rmesa->tcl.rgba,
465 (char *)VB->ColorPtr[0]->Ptr,
466 VB->ColorPtr[0]->Size,
467 VB->ColorPtr[0]->StrideB,
468 count);
469
470 vfmt |= RADEON_CP_VC_FRMT_PKCOLOR;
471 }
472 else {
473 int emitsize;
474
475 if (VB->ColorPtr[0]->Size == 4 &&
476 (VB->ColorPtr[0]->StrideB != 0 ||
477 ((GLfloat *)VB->ColorPtr[0]->Ptr)[3] != 1.0)) {
478 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
479 emitsize = 4;
480 }
481 else {
482 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
483 emitsize = 3;
484 }
485
486 if (!rmesa->tcl.rgba.buf)
487 emit_vector( ctx,
488 &(rmesa->tcl.rgba),
489 (char *)VB->ColorPtr[0]->Ptr,
490 emitsize,
491 VB->ColorPtr[0]->StrideB,
492 count);
493 }
494
495 component[nr++] = &rmesa->tcl.rgba;
496 }
497
498
499 if (inputs & VERT_BIT_COLOR1) {
500 if (!rmesa->tcl.spec.buf) {
501 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
502 radeon_import_float_spec_colors( ctx );
503
504 emit_ubyte_rgba( ctx,
505 &rmesa->tcl.spec,
506 (char *)VB->SecondaryColorPtr[0]->Ptr,
507 3,
508 VB->SecondaryColorPtr[0]->StrideB,
509 count);
510 }
511
512 vfmt |= RADEON_CP_VC_FRMT_PKSPEC;
513 component[nr++] = &rmesa->tcl.spec;
514 }
515
516 vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
517 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
518
519 if (inputs & VERT_BIT_TEX0) {
520 if (!rmesa->tcl.tex[0].buf)
521 emit_tex_vector( ctx,
522 &(rmesa->tcl.tex[0]),
523 (char *)VB->TexCoordPtr[0]->data,
524 VB->TexCoordPtr[0]->size,
525 VB->TexCoordPtr[0]->stride,
526 count );
527
528 switch( VB->TexCoordPtr[0]->size ) {
529 case 4:
530 vtx |= RADEON_TCL_VTX_Q0;
531 vfmt |= RADEON_CP_VC_FRMT_Q0;
532 default:
533 vfmt |= RADEON_CP_VC_FRMT_ST0;
534 }
535 component[nr++] = &rmesa->tcl.tex[0];
536 }
537
538 if (inputs & VERT_BIT_TEX1) {
539 if (!rmesa->tcl.tex[1].buf)
540 emit_tex_vector( ctx,
541 &(rmesa->tcl.tex[1]),
542 (char *)VB->TexCoordPtr[1]->data,
543 VB->TexCoordPtr[1]->size,
544 VB->TexCoordPtr[1]->stride,
545 count );
546
547 switch( VB->TexCoordPtr[1]->size ) {
548 case 4:
549 vtx |= RADEON_TCL_VTX_Q1;
550 vfmt |= RADEON_CP_VC_FRMT_Q1;
551 default:
552 vfmt |= RADEON_CP_VC_FRMT_ST1;
553 }
554 component[nr++] = &rmesa->tcl.tex[1];
555 }
556
557 if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
558 RADEON_STATECHANGE( rmesa, tcl );
559 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
560 }
561
562 rmesa->tcl.nr_aos_components = nr;
563 rmesa->tcl.vertex_format = vfmt;
564 }
565
566
567 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
568 {
569 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
570
571 if (RADEON_DEBUG & DEBUG_VERTS)
572 _tnl_print_vert_flags( __FUNCTION__, newinputs );
573
574 if (newinputs & VERT_BIT_POS)
575 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
576
577 if (newinputs & VERT_BIT_NORMAL)
578 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
579
580 if (newinputs & VERT_BIT_COLOR0)
581 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
582
583 if (newinputs & VERT_BIT_COLOR1)
584 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
585
586 if (newinputs & VERT_BIT_TEX0)
587 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
588
589 if (newinputs & VERT_BIT_TEX1)
590 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
591 }