Support for idx buffers. Leaving it on by default as it doesnt seem to cause any...
[mesa.git] / src / mesa / drivers / dri / r300 / r300_maos.c
1 /* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */
2 /*
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4
5 The Weather Channel (TM) funded Tungsten Graphics to develop the
6 initial release of the Radeon 8500 driver under the XFree86 license.
7 This notice must be preserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 **************************************************************************/
30
31 /*
32 * Authors:
33 * Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41
42 #include "swrast_setup/swrast_setup.h"
43 #include "math/m_translate.h"
44 #include "tnl/tnl.h"
45 #include "tnl/t_context.h"
46
47 #include "r300_context.h"
48 #include "radeon_ioctl.h"
49 #include "r300_state.h"
50 #include "r300_maos.h"
51 #include "r300_ioctl.h"
52
53 #define DEBUG_ALL DEBUG_VERTS
54
55 #if defined(USE_X86_ASM)
56 #define COPY_DWORDS( dst, src, nr ) \
57 do { \
58 int __tmp; \
59 __asm__ __volatile__( "rep ; movsl" \
60 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
61 : "0" (nr), \
62 "D" ((long)dst), \
63 "S" ((long)src) ); \
64 } while (0)
65 #else
66 #define COPY_DWORDS( dst, src, nr ) \
67 do { \
68 int j; \
69 for ( j = 0 ; j < nr ; j++ ) \
70 dst[j] = ((int *)src)[j]; \
71 dst += nr; \
72 } while (0)
73 #endif
74
75 static void emit_vec4(GLcontext * ctx,
76 struct r300_dma_region *rvb,
77 char *data, int stride, int count)
78 {
79 int i;
80 int *out = (int *)(rvb->address + rvb->start);
81
82 if (RADEON_DEBUG & DEBUG_VERTS)
83 fprintf(stderr, "%s count %d stride %d\n",
84 __FUNCTION__, count, stride);
85
86 if (stride == 4)
87 COPY_DWORDS(out, data, count);
88 else
89 for (i = 0; i < count; i++) {
90 out[0] = *(int *)data;
91 out++;
92 data += stride;
93 }
94 }
95
96 static void emit_vec8(GLcontext * ctx,
97 struct r300_dma_region *rvb,
98 char *data, int stride, int count)
99 {
100 int i;
101 int *out = (int *)(rvb->address + rvb->start);
102
103 if (RADEON_DEBUG & DEBUG_VERTS)
104 fprintf(stderr, "%s count %d stride %d\n",
105 __FUNCTION__, count, stride);
106
107 if (stride == 8)
108 COPY_DWORDS(out, data, count * 2);
109 else
110 for (i = 0; i < count; i++) {
111 out[0] = *(int *)data;
112 out[1] = *(int *)(data + 4);
113 out += 2;
114 data += stride;
115 }
116 }
117
118 static void emit_vec12(GLcontext * ctx,
119 struct r300_dma_region *rvb,
120 char *data, int stride, int count)
121 {
122 int i;
123 int *out = (int *)(rvb->address + rvb->start);
124
125 if (RADEON_DEBUG & DEBUG_VERTS)
126 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
127 __FUNCTION__, count, stride, (void *)out, (void *)data);
128
129 if (stride == 12)
130 COPY_DWORDS(out, data, count * 3);
131 else
132 for (i = 0; i < count; i++) {
133 out[0] = *(int *)data;
134 out[1] = *(int *)(data + 4);
135 out[2] = *(int *)(data + 8);
136 out += 3;
137 data += stride;
138 }
139 }
140
141 static void emit_vec16(GLcontext * ctx,
142 struct r300_dma_region *rvb,
143 char *data, int stride, int count)
144 {
145 int i;
146 int *out = (int *)(rvb->address + rvb->start);
147
148 if (RADEON_DEBUG & DEBUG_VERTS)
149 fprintf(stderr, "%s count %d stride %d\n",
150 __FUNCTION__, count, stride);
151
152 if (stride == 16)
153 COPY_DWORDS(out, data, count * 4);
154 else
155 for (i = 0; i < count; i++) {
156 out[0] = *(int *)data;
157 out[1] = *(int *)(data + 4);
158 out[2] = *(int *)(data + 8);
159 out[3] = *(int *)(data + 12);
160 out += 4;
161 data += stride;
162 }
163 }
164
165 static void emit_vector(GLcontext * ctx,
166 struct r300_dma_region *rvb,
167 char *data, int size, int stride, int count)
168 {
169 r300ContextPtr rmesa = R300_CONTEXT(ctx);
170
171 if (RADEON_DEBUG & DEBUG_VERTS)
172 fprintf(stderr, "%s count %d size %d stride %d\n",
173 __FUNCTION__, count, size, stride);
174
175 assert(!rvb->buf);
176
177 if (stride == 0) {
178 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
179 count = 1;
180 rvb->aos_offset = GET_START(rvb);
181 rvb->aos_stride = 0;
182 rvb->aos_size = size;
183 } else {
184 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */
185 rvb->aos_offset = GET_START(rvb);
186 rvb->aos_stride = size;
187 rvb->aos_size = size;
188 }
189
190 /* Emit the data
191 */
192 switch (size) {
193 case 1:
194 emit_vec4(ctx, rvb, data, stride, count);
195 break;
196 case 2:
197 emit_vec8(ctx, rvb, data, stride, count);
198 break;
199 case 3:
200 emit_vec12(ctx, rvb, data, stride, count);
201 break;
202 case 4:
203 emit_vec16(ctx, rvb, data, stride, count);
204 break;
205 default:
206 assert(0);
207 exit(1);
208 break;
209 }
210
211 }
212
213 void emit_elts(GLcontext * ctx, GLuint *elts, int oec, int ec)
214 {
215 r300ContextPtr rmesa = R300_CONTEXT(ctx);
216 radeonScreenPtr rsp=rmesa->radeon.radeonScreen;
217 unsigned short int *hw_elts;
218 int i;
219 int inc_found=0;
220 int dec_found=0;
221
222 hw_elts=malloc(ec*sizeof(unsigned short int));
223
224 for(i=0; i < oec; i++)
225 hw_elts[i]=(unsigned short int)elts[i];
226
227 /* Work around magic_1 problem by filling rest of the data with last idx */
228 for(; i < ec; i++)
229 hw_elts[i]=(unsigned short int)elts[oec-1];
230
231 memcpy(rsp->gartTextures.map, hw_elts, ec*sizeof(unsigned short int));
232 //memset(((char *)rsp->gartTextures.map)+ec*sizeof(unsigned short int), 0, 1024);
233 /*emit_vector(ctx, &rmesa->state.elt_ao,
234 (char *)hw_elts,
235 2,
236 2, ec);*/
237 /*
238 // some debug code...
239 inc_found=1;
240 for(i=1; i < oec; i++)
241 if(hw_elts[i-1] != hw_elts[i]+1){
242 inc_found=0;
243 break;
244 }
245
246 dec_found=1;
247 for(i=1; i < oec; i++)
248 if(hw_elts[i-1] != hw_elts[i]-1){
249 dec_found=0;
250 }
251
252 fprintf(stderr, "elts:");
253 for(i=0; i < oec; i++)
254 fprintf(stderr, "%d\n", hw_elts[i]);
255 fprintf(stderr, "\n");
256
257 if(inc_found==0 && dec_found==0){
258 fprintf(stderr, "error found\n");
259 exit(-1);
260 }
261 */
262 }
263
264 /* Emit vertex data to GART memory (unless immediate mode)
265 * Route inputs to the vertex processor
266 */
267 void r300EmitArrays(GLcontext * ctx, GLboolean immd)
268 {
269 r300ContextPtr rmesa = R300_CONTEXT(ctx);
270 r300ContextPtr r300 = rmesa;
271 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
272 GLuint nr = 0;
273 GLuint count = VB->Count;
274 GLuint dw,mask;
275 GLuint vic_1 = 0; /* R300_VAP_INPUT_CNTL_1 */
276 GLuint aa_vap_reg = 0; /* VAP register assignment */
277 GLuint i;
278 GLuint inputs = 0;
279
280 #define CONFIGURE_AOS(f, v, sz, cn) { \
281 if (RADEON_DEBUG & DEBUG_STATE) \
282 fprintf(stderr, "Enabling "#v "\n"); \
283 if (++nr >= R300_MAX_AOS_ARRAYS) { \
284 fprintf(stderr, "Aieee! AOS array count exceeded!\n"); \
285 exit(-1); \
286 } \
287 \
288 if (rmesa->current_vp == NULL) \
289 rmesa->state.aos[nr-1].aos_reg = aa_vap_reg++; \
290 rmesa->state.aos[nr-1].aos_format = f; \
291 if (immd) { \
292 rmesa->state.aos[nr-1].aos_size = 4; \
293 rmesa->state.aos[nr-1].aos_stride = 4; \
294 rmesa->state.aos[nr-1].aos_offset = 0; \
295 } else { \
296 emit_vector(ctx, \
297 &rmesa->state.aos[nr-1], \
298 v->data, \
299 sz, \
300 v->stride, \
301 cn); \
302 } \
303 }
304
305 if (rmesa->current_vp != NULL) {
306 if (rmesa->current_vp->inputs[VERT_ATTRIB_POS] != -1) {
307 inputs |= _TNL_BIT_POS;
308 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_POS];
309 }
310 if (rmesa->current_vp->inputs[VERT_ATTRIB_NORMAL] != -1) {
311 inputs |= _TNL_BIT_NORMAL;
312 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_NORMAL];
313 }
314 if (rmesa->current_vp->inputs[VERT_ATTRIB_COLOR0] != -1) {
315 inputs |= _TNL_BIT_COLOR0;
316 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_COLOR0];
317 }
318 if (rmesa->current_vp->inputs[VERT_ATTRIB_COLOR1] != -1) {
319 inputs |= _TNL_BIT_COLOR1;
320 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_COLOR1];
321 }
322 if (rmesa->current_vp->inputs[VERT_ATTRIB_FOG] != -1) {
323 inputs |= _TNL_BIT_FOG;
324 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_FOG];
325 }
326 for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
327 if (rmesa->current_vp->inputs[VERT_ATTRIB_TEX0+i] != -1)
328 inputs |= _TNL_BIT_TEX0<<i;
329 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_TEX0+i];
330 }
331 nr = 0;
332 } else {
333 inputs = TNL_CONTEXT(ctx)->render_inputs;
334 }
335 rmesa->state.render_inputs = inputs;
336
337 if (inputs & _TNL_BIT_POS) {
338 CONFIGURE_AOS( AOS_FORMAT_FLOAT,
339 VB->ObjPtr,
340 immd ? 4 : VB->ObjPtr->size,
341 count);
342
343 vic_1 |= R300_INPUT_CNTL_POS;
344 }
345
346 if (inputs & _TNL_BIT_NORMAL) {
347 CONFIGURE_AOS( AOS_FORMAT_FLOAT,
348 VB->NormalPtr,
349 immd ? 4 : VB->NormalPtr->size,
350 count);
351
352 vic_1 |= R300_INPUT_CNTL_NORMAL;
353 }
354
355 if (inputs & _TNL_BIT_COLOR0) {
356 int emitsize;
357
358 if (!immd) {
359 if (VB->ColorPtr[0]->size == 4 &&
360 (VB->ColorPtr[0]->stride != 0 ||
361 VB->ColorPtr[0]->data[0][3] != 1.0)) {
362 emitsize = 4;
363 } else {
364 emitsize = 3;
365 }
366 }
367
368 CONFIGURE_AOS( AOS_FORMAT_FLOAT_COLOR,
369 VB->ColorPtr[0],
370 immd ? 4 : emitsize,
371 count);
372
373 vic_1 |= R300_INPUT_CNTL_COLOR;
374 }
375
376 if (inputs & _TNL_BIT_COLOR1) {
377 CONFIGURE_AOS( AOS_FORMAT_FLOAT_COLOR,
378 VB->SecondaryColorPtr[0],
379 immd ? 4 : VB->SecondaryColorPtr[0]->size,
380 count);
381 }
382
383 #if 0
384 if (inputs & _TNL_BIT_FOG) {
385 CONFIGURE_AOS( AOS_FORMAT_FLOAT,
386 VB->FogCoordPtr,
387 immd ? 4 : VB->FogCoordPtr->size,
388 count);
389 }
390 #endif
391
392 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
393 if (inputs & (_TNL_BIT_TEX0 << i)) {
394 CONFIGURE_AOS( AOS_FORMAT_FLOAT,
395 VB->TexCoordPtr[i],
396 immd ? 4 : VB->TexCoordPtr[i]->size,
397 count);
398
399 vic_1 |= R300_INPUT_CNTL_TC0 << i;
400 }
401 }
402
403 int cmd_reserved=0;
404 int cmd_written=0;
405 drm_radeon_cmd_header_t *cmd = NULL;
406
407 #define SHOW_INFO(n) do { \
408 if (RADEON_DEBUG & DEBUG_ALL) { \
409 fprintf(stderr, "RR[%d] - sz=%d, reg=%d, fmt=%d -- st=%d, of=0x%08x\n", \
410 n, \
411 r300->state.aos[n].aos_size, \
412 r300->state.aos[n].aos_reg, \
413 r300->state.aos[n].aos_format, \
414 r300->state.aos[n].aos_stride, \
415 r300->state.aos[n].aos_offset); \
416 } \
417 } while(0);
418
419 /* setup INPUT_ROUTE */
420 R300_STATECHANGE(r300, vir[0]);
421 for(i=0;i+1<nr;i+=2){
422 SHOW_INFO(i)
423 SHOW_INFO(i+1)
424 dw=(r300->state.aos[i].aos_size-1)
425 | ((r300->state.aos[i].aos_reg)<<8)
426 | (r300->state.aos[i].aos_format<<14)
427 | (((r300->state.aos[i+1].aos_size-1)
428 | ((r300->state.aos[i+1].aos_reg)<<8)
429 | (r300->state.aos[i+1].aos_format<<14))<<16);
430
431 if(i+2==nr){
432 dw|=(1<<(13+16));
433 }
434 r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
435 }
436 if(nr & 1){
437 SHOW_INFO(nr-1)
438 dw=(r300->state.aos[nr-1].aos_size-1)
439 | (r300->state.aos[nr-1].aos_format<<14)
440 | ((r300->state.aos[nr-1].aos_reg)<<8)
441 | (1<<13);
442 r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
443 //fprintf(stderr, "vir0 dw=%08x\n", dw);
444 }
445 /* Set the rest of INPUT_ROUTE_0 to 0 */
446 //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0);
447 ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->unchecked_state.count = (nr+1)>>1;
448
449
450 /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
451 #define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
452 | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
453 | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
454 | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
455
456 #define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
457 | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
458 | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
459 | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
460
461 R300_STATECHANGE(r300, vir[1]);
462
463 for(i=0;i+1<nr;i+=2){
464 /* do i first.. */
465 mask=(1<<(r300->state.aos[i].aos_size*3))-1;
466 dw=(ALL_COMPONENTS & mask)
467 | (ALL_DEFAULT & ~mask)
468 | R300_INPUT_ROUTE_ENABLE;
469
470 /* i+1 */
471 mask=(1<<(r300->state.aos[i+1].aos_size*3))-1;
472 dw|=(
473 (ALL_COMPONENTS & mask)
474 | (ALL_DEFAULT & ~mask)
475 | R300_INPUT_ROUTE_ENABLE
476 )<<16;
477
478 r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
479 }
480 if(nr & 1){
481 mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1;
482 dw=(ALL_COMPONENTS & mask)
483 | (ALL_DEFAULT & ~mask)
484 | R300_INPUT_ROUTE_ENABLE;
485 r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
486 //fprintf(stderr, "vir1 dw=%08x\n", dw);
487 }
488 /* Set the rest of INPUT_ROUTE_1 to 0 */
489 //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
490 ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->unchecked_state.count = (nr+1)>>1;
491
492 /* Set up input_cntl */
493 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
494 R300_STATECHANGE(r300, vic);
495 r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555; /* Hard coded value, no idea what it means */
496 r300->hw.vic.cmd[R300_VIC_CNTL_1]=vic_1;
497
498 #if 0
499 r300->hw.vic.cmd[R300_VIC_CNTL_1]=0;
500
501 if(r300->state.render_inputs & _TNL_BIT_POS)
502 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_POS;
503
504 if(r300->state.render_inputs & _TNL_BIT_NORMAL)
505 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_NORMAL;
506
507 if(r300->state.render_inputs & _TNL_BIT_COLOR0)
508 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_COLOR;
509
510 for(i=0;i < ctx->Const.MaxTextureUnits;i++)
511 if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
512 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i);
513 #endif
514
515 /* Stage 3: VAP output */
516 R300_STATECHANGE(r300, vof);
517 r300->hw.vof.cmd[R300_VOF_CNTL_0]=R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
518 | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
519
520 r300->hw.vof.cmd[R300_VOF_CNTL_1]=0;
521 for(i=0;i < ctx->Const.MaxTextureUnits;i++)
522 if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
523 r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i));
524
525 rmesa->state.aos_count = nr;
526 }
527
528 void r300ReleaseArrays(GLcontext * ctx)
529 {
530 r300ContextPtr rmesa = R300_CONTEXT(ctx);
531 int i;
532
533 //r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_ao, __FUNCTION__);
534 for (i=0;i<rmesa->state.aos_count;i++) {
535 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
536 }
537 }