Fixes for bugs that were nailed down when compairing against software vertex shading.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_maos.c
1 /* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */
2 /*
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4
5 The Weather Channel (TM) funded Tungsten Graphics to develop the
6 initial release of the Radeon 8500 driver under the XFree86 license.
7 This notice must be preserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 **************************************************************************/
30
31 /*
32 * Authors:
33 * Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41
42 #include "swrast_setup/swrast_setup.h"
43 #include "math/m_translate.h"
44 #include "tnl/tnl.h"
45 #include "tnl/t_context.h"
46
47 #include "r300_context.h"
48 #include "radeon_ioctl.h"
49 #include "r300_state.h"
50 #include "r300_maos.h"
51 #include "r300_ioctl.h"
52
53 #define DEBUG_ALL DEBUG_VERTS
54
55
56 #if defined(USE_X86_ASM)
57 #define COPY_DWORDS( dst, src, nr ) \
58 do { \
59 int __tmp; \
60 __asm__ __volatile__( "rep ; movsl" \
61 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
62 : "0" (nr), \
63 "D" ((long)dst), \
64 "S" ((long)src) ); \
65 } while (0)
66 #else
67 #define COPY_DWORDS( dst, src, nr ) \
68 do { \
69 int j; \
70 for ( j = 0 ; j < nr ; j++ ) \
71 dst[j] = ((int *)src)[j]; \
72 dst += nr; \
73 } while (0)
74 #endif
75
76 static void emit_vec4(GLcontext * ctx,
77 struct r300_dma_region *rvb,
78 char *data, int stride, int count)
79 {
80 int i;
81 int *out = (int *)(rvb->address + rvb->start);
82
83 if (RADEON_DEBUG & DEBUG_VERTS)
84 fprintf(stderr, "%s count %d stride %d\n",
85 __FUNCTION__, count, stride);
86
87 if (stride == 4)
88 COPY_DWORDS(out, data, count);
89 else
90 for (i = 0; i < count; i++) {
91 out[0] = *(int *)data;
92 out++;
93 data += stride;
94 }
95 }
96
97 static void emit_vec8(GLcontext * ctx,
98 struct r300_dma_region *rvb,
99 char *data, int stride, int count)
100 {
101 int i;
102 int *out = (int *)(rvb->address + rvb->start);
103
104 if (RADEON_DEBUG & DEBUG_VERTS)
105 fprintf(stderr, "%s count %d stride %d\n",
106 __FUNCTION__, count, stride);
107
108 if (stride == 8)
109 COPY_DWORDS(out, data, count * 2);
110 else
111 for (i = 0; i < count; i++) {
112 out[0] = *(int *)data;
113 out[1] = *(int *)(data + 4);
114 out += 2;
115 data += stride;
116 }
117 }
118
119 static void emit_vec12(GLcontext * ctx,
120 struct r300_dma_region *rvb,
121 char *data, int stride, int count)
122 {
123 int i;
124 int *out = (int *)(rvb->address + rvb->start);
125
126 if (RADEON_DEBUG & DEBUG_VERTS)
127 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
128 __FUNCTION__, count, stride, (void *)out, (void *)data);
129
130 if (stride == 12)
131 COPY_DWORDS(out, data, count * 3);
132 else
133 for (i = 0; i < count; i++) {
134 out[0] = *(int *)data;
135 out[1] = *(int *)(data + 4);
136 out[2] = *(int *)(data + 8);
137 out += 3;
138 data += stride;
139 }
140 }
141
142 static void emit_vec16(GLcontext * ctx,
143 struct r300_dma_region *rvb,
144 char *data, int stride, int count)
145 {
146 int i;
147 int *out = (int *)(rvb->address + rvb->start);
148
149 if (RADEON_DEBUG & DEBUG_VERTS)
150 fprintf(stderr, "%s count %d stride %d\n",
151 __FUNCTION__, count, stride);
152
153 if (stride == 16)
154 COPY_DWORDS(out, data, count * 4);
155 else
156 for (i = 0; i < count; i++) {
157 out[0] = *(int *)data;
158 out[1] = *(int *)(data + 4);
159 out[2] = *(int *)(data + 8);
160 out[3] = *(int *)(data + 12);
161 out += 4;
162 data += stride;
163 }
164 }
165
166 static void emit_vector(GLcontext * ctx,
167 struct r300_dma_region *rvb,
168 char *data, int size, int stride, int count)
169 {
170 r300ContextPtr rmesa = R300_CONTEXT(ctx);
171
172 if (RADEON_DEBUG & DEBUG_VERTS)
173 fprintf(stderr, "%s count %d size %d stride %d\n",
174 __FUNCTION__, count, size, stride);
175
176 assert(!rvb->buf);
177
178 if (stride == 0) {
179 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
180 count = 1;
181 rvb->aos_offset = GET_START(rvb);
182 rvb->aos_stride = 0;
183 rvb->aos_size = size;
184 } else {
185 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */
186 rvb->aos_offset = GET_START(rvb);
187 rvb->aos_stride = size;
188 rvb->aos_size = size;
189 }
190
191 /* Emit the data
192 */
193 switch (size) {
194 case 1:
195 emit_vec4(ctx, rvb, data, stride, count);
196 break;
197 case 2:
198 emit_vec8(ctx, rvb, data, stride, count);
199 break;
200 case 3:
201 emit_vec12(ctx, rvb, data, stride, count);
202 break;
203 case 4:
204 emit_vec16(ctx, rvb, data, stride, count);
205 break;
206 default:
207 assert(0);
208 exit(1);
209 break;
210 }
211
212 }
213
214 void r300EmitElts(GLcontext * ctx, GLuint *elts, unsigned long n_elts)
215 {
216 r300ContextPtr rmesa = R300_CONTEXT(ctx);
217 struct r300_dma_region *rvb=&rmesa->state.elt_dma;
218 unsigned short int *out;
219 int i;
220
221 r300AllocDmaRegion(rmesa, rvb, n_elts*sizeof(unsigned short int), 2);
222
223 out = (unsigned short int *)(rvb->address + rvb->start);
224
225 for(i=0; i < n_elts; i++)
226 out[i]=(unsigned short int)elts[i];
227 }
228
229 /* Emit vertex data to GART memory (unless immediate mode)
230 * Route inputs to the vertex processor
231 */
232 void r300EmitArrays(GLcontext * ctx, GLboolean immd)
233 {
234 r300ContextPtr rmesa = R300_CONTEXT(ctx);
235 r300ContextPtr r300 = rmesa;
236 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
237 GLuint nr = 0;
238 GLuint count = VB->Count;
239 GLuint dw,mask;
240 GLuint vic_1 = 0; /* R300_VAP_INPUT_CNTL_1 */
241 GLuint aa_vap_reg = 0; /* VAP register assignment */
242 GLuint i;
243 GLuint inputs = 0, outputs = 0;
244
245
246 #define CONFIGURE_AOS(r, f, v, sz, cn) { \
247 if (RADEON_DEBUG & DEBUG_STATE) \
248 fprintf(stderr, "Enabling "#v "\n"); \
249 if (++nr >= R300_MAX_AOS_ARRAYS) { \
250 fprintf(stderr, "Aieee! AOS array count exceeded!\n"); \
251 exit(-1); \
252 } \
253 \
254 if (VERTPROG_ACTIVE(ctx) == GL_FALSE) \
255 rmesa->state.aos[nr-1].aos_reg = aa_vap_reg++; \
256 rmesa->state.aos[nr-1].aos_format = f; \
257 if (immd) { \
258 rmesa->state.aos[nr-1].aos_size = 4; \
259 rmesa->state.aos[nr-1].aos_stride = 4; \
260 rmesa->state.aos[nr-1].aos_offset = 0; \
261 } else { \
262 emit_vector(ctx, \
263 &rmesa->state.aos[nr-1], \
264 v->data, \
265 sz, \
266 v->stride, \
267 cn); \
268 rmesa->state.vap_reg.r=rmesa->state.aos[nr-1].aos_reg; \
269 } \
270 }
271
272 if (VERTPROG_ACTIVE(ctx)) {
273 if (rmesa->current_vp->inputs[VERT_ATTRIB_POS] != -1) {
274 inputs |= _TNL_BIT_POS;
275 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_POS];
276 }
277 if (rmesa->current_vp->inputs[VERT_ATTRIB_NORMAL] != -1) {
278 inputs |= _TNL_BIT_NORMAL;
279 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_NORMAL];
280 }
281 if (rmesa->current_vp->inputs[VERT_ATTRIB_COLOR0] != -1) {
282 inputs |= _TNL_BIT_COLOR0;
283 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_COLOR0];
284 }
285 if (rmesa->current_vp->inputs[VERT_ATTRIB_COLOR1] != -1) {
286 inputs |= _TNL_BIT_COLOR1;
287 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_COLOR1];
288 }
289 if (rmesa->current_vp->inputs[VERT_ATTRIB_FOG] != -1) {
290 inputs |= _TNL_BIT_FOG;
291 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_FOG];
292 }
293 if(ctx->Const.MaxTextureUnits > 8) { /* Not sure if this can even happen... */
294 fprintf(stderr, "%s: Cant handle that many inputs\n", __FUNCTION__);
295 exit(-1);
296 }
297 for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
298 if (rmesa->current_vp->inputs[VERT_ATTRIB_TEX0+i] != -1) {
299 inputs |= _TNL_BIT_TEX0<<i;
300 rmesa->state.aos[nr++].aos_reg = rmesa->current_vp->inputs[VERT_ATTRIB_TEX0+i];
301 }
302 }
303 nr = 0;
304 } else {
305 inputs = TNL_CONTEXT(ctx)->render_inputs;
306 /* Hack to see what would happen if we would enable tex units according to their enabled values.
307 Why arent we doing this?
308 As for vertex programs tex coords should be passed if program wants them as some programs might deliver
309 some other values to the program with them. Futher more some programs might generate output tex coords
310 without taking them as inputs. */
311 /*for (i=0;i<ctx->Const.MaxTextureUnits;i++)
312 if(ctx->Texture.Unit[i].Enabled == 0)
313 inputs &= ~ (_TNL_BIT_TEX0<<i);*/
314 }
315 rmesa->state.render_inputs = inputs;
316
317 if (inputs & _TNL_BIT_POS) {
318 CONFIGURE_AOS(i_coords, AOS_FORMAT_FLOAT,
319 VB->ObjPtr,
320 immd ? 4 : VB->ObjPtr->size,
321 count);
322
323 vic_1 |= R300_INPUT_CNTL_POS;
324 }
325
326 if (inputs & _TNL_BIT_NORMAL) {
327 CONFIGURE_AOS(i_normal, AOS_FORMAT_FLOAT,
328 VB->NormalPtr,
329 immd ? 4 : VB->NormalPtr->size,
330 count);
331
332 vic_1 |= R300_INPUT_CNTL_NORMAL;
333 }
334
335 if (inputs & _TNL_BIT_COLOR0) {
336 int emitsize=4;
337
338 if (!immd) {
339 if (VB->ColorPtr[0]->size == 4 &&
340 (VB->ColorPtr[0]->stride != 0 ||
341 VB->ColorPtr[0]->data[0][3] != 1.0)) {
342 emitsize = 4;
343 } else {
344 emitsize = 3;
345 }
346 }
347
348 CONFIGURE_AOS(i_color[0], AOS_FORMAT_FLOAT_COLOR,
349 VB->ColorPtr[0],
350 immd ? 4 : emitsize,
351 count);
352
353 vic_1 |= R300_INPUT_CNTL_COLOR;
354 }
355
356 if (inputs & _TNL_BIT_COLOR1) {
357 CONFIGURE_AOS(i_color[1], AOS_FORMAT_FLOAT_COLOR,
358 VB->SecondaryColorPtr[0],
359 immd ? 4 : VB->SecondaryColorPtr[0]->size,
360 count);
361 }
362
363 #if 0
364 if (inputs & _TNL_BIT_FOG) {
365 CONFIGURE_AOS( AOS_FORMAT_FLOAT,
366 VB->FogCoordPtr,
367 immd ? 4 : VB->FogCoordPtr->size,
368 count);
369 }
370 #endif
371
372 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
373 if (inputs & (_TNL_BIT_TEX0 << i)) {
374 CONFIGURE_AOS(i_tex[i], AOS_FORMAT_FLOAT,
375 VB->TexCoordPtr[i],
376 immd ? 4 : VB->TexCoordPtr[i]->size,
377 count);
378
379 vic_1 |= R300_INPUT_CNTL_TC0 << i;
380 }
381 }
382
383
384 int cmd_reserved=0;
385 int cmd_written=0;
386 drm_radeon_cmd_header_t *cmd = NULL;
387
388 #define SHOW_INFO(n) do { \
389 if (RADEON_DEBUG & DEBUG_ALL) { \
390 fprintf(stderr, "RR[%d] - sz=%d, reg=%d, fmt=%d -- st=%d, of=0x%08x\n", \
391 n, \
392 r300->state.aos[n].aos_size, \
393 r300->state.aos[n].aos_reg, \
394 r300->state.aos[n].aos_format, \
395 r300->state.aos[n].aos_stride, \
396 r300->state.aos[n].aos_offset); \
397 } \
398 } while(0);
399
400 /* setup INPUT_ROUTE */
401 R300_STATECHANGE(r300, vir[0]);
402 for(i=0;i+1<nr;i+=2){
403 SHOW_INFO(i)
404 SHOW_INFO(i+1)
405 dw=(r300->state.aos[i].aos_size-1)
406 | ((r300->state.aos[i].aos_reg)<<8)
407 | (r300->state.aos[i].aos_format<<14)
408 | (((r300->state.aos[i+1].aos_size-1)
409 | ((r300->state.aos[i+1].aos_reg)<<8)
410 | (r300->state.aos[i+1].aos_format<<14))<<16);
411
412 if(i+2==nr){
413 dw|=(1<<(13+16));
414 }
415 r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
416 }
417 if(nr & 1){
418 SHOW_INFO(nr-1)
419 dw=(r300->state.aos[nr-1].aos_size-1)
420 | (r300->state.aos[nr-1].aos_format<<14)
421 | ((r300->state.aos[nr-1].aos_reg)<<8)
422 | (1<<13);
423 r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
424 //fprintf(stderr, "vir0 dw=%08x\n", dw);
425 }
426 /* Set the rest of INPUT_ROUTE_0 to 0 */
427 //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0);
428 ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->unchecked_state.count = (nr+1)>>1;
429
430
431 /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
432 #define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
433 | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
434 | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
435 | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
436
437 #define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
438 | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
439 | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
440 | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
441
442 R300_STATECHANGE(r300, vir[1]);
443
444 for(i=0;i+1<nr;i+=2){
445 /* do i first.. */
446 mask=(1<<(r300->state.aos[i].aos_size*3))-1;
447 dw=(ALL_COMPONENTS & mask)
448 | (ALL_DEFAULT & ~mask)
449 | R300_INPUT_ROUTE_ENABLE;
450
451 /* i+1 */
452 mask=(1<<(r300->state.aos[i+1].aos_size*3))-1;
453 dw|=(
454 (ALL_COMPONENTS & mask)
455 | (ALL_DEFAULT & ~mask)
456 | R300_INPUT_ROUTE_ENABLE
457 )<<16;
458
459 r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
460 }
461 if(nr & 1){
462 mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1;
463 dw=(ALL_COMPONENTS & mask)
464 | (ALL_DEFAULT & ~mask)
465 | R300_INPUT_ROUTE_ENABLE;
466 r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
467 //fprintf(stderr, "vir1 dw=%08x\n", dw);
468 }
469 /* Set the rest of INPUT_ROUTE_1 to 0 */
470 //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
471 ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->unchecked_state.count = (nr+1)>>1;
472
473 /* Set up input_cntl */
474 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
475 R300_STATECHANGE(r300, vic);
476 r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555; /* Hard coded value, no idea what it means */
477 r300->hw.vic.cmd[R300_VIC_CNTL_1]=vic_1;
478
479 #if 0
480 r300->hw.vic.cmd[R300_VIC_CNTL_1]=0;
481
482 if(r300->state.render_inputs & _TNL_BIT_POS)
483 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_POS;
484
485 if(r300->state.render_inputs & _TNL_BIT_NORMAL)
486 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_NORMAL;
487
488 if(r300->state.render_inputs & _TNL_BIT_COLOR0)
489 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_COLOR;
490
491 for(i=0;i < ctx->Const.MaxTextureUnits;i++)
492 if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
493 r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i);
494 #endif
495
496 /* Stage 3: VAP output */
497 if (VERTPROG_ACTIVE(ctx))
498 outputs = rmesa->current_vp->outputs;
499 else
500 outputs = inputs;
501
502 R300_STATECHANGE(r300, vof);
503
504 r300->hw.vof.cmd[R300_VOF_CNTL_0]=0;
505 if(outputs & _TNL_BIT_POS)
506 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
507 if(outputs & _TNL_BIT_COLOR0)
508 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
509
510 r300->hw.vof.cmd[R300_VOF_CNTL_1]=0;
511 for(i=0;i < ctx->Const.MaxTextureUnits;i++)
512 if(outputs & (_TNL_BIT_TEX0<<i))
513 r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i));
514
515 rmesa->state.aos_count = nr;
516 }
517
518 void r300ReleaseArrays(GLcontext * ctx)
519 {
520 r300ContextPtr rmesa = R300_CONTEXT(ctx);
521 int i;
522
523 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
524 for (i=0;i<rmesa->state.aos_count;i++) {
525 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
526 }
527 }