1 /* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5 The Weather Channel (TM) funded Tungsten Graphics to develop the
6 initial release of the Radeon 8500 driver under the XFree86 license.
7 This notice must be preserved.
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 **************************************************************************/
33 * Keith Whitwell <keith@tungstengraphics.com>
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
46 #include "tnl/t_context.h"
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_maos.h"
52 #include "r300_ioctl.h"
55 #include "radeon_mm.h"
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
67 #define DEBUG_ALL DEBUG_VERTS
70 #if defined(USE_X86_ASM)
71 #define COPY_DWORDS( dst, src, nr ) \
74 __asm__ __volatile__( "rep ; movsl" \
75 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
81 #define COPY_DWORDS( dst, src, nr ) \
84 for ( j = 0 ; j < nr ; j++ ) \
85 dst[j] = ((int *)src)[j]; \
90 static void emit_vec4(GLcontext
* ctx
,
91 struct r300_dma_region
*rvb
,
92 GLvoid
*data
, int stride
, int count
)
95 int *out
= (int *)(rvb
->address
+ rvb
->start
);
97 if (RADEON_DEBUG
& DEBUG_VERTS
)
98 fprintf(stderr
, "%s count %d stride %d\n",
99 __FUNCTION__
, count
, stride
);
102 COPY_DWORDS(out
, data
, count
);
104 for (i
= 0; i
< count
; i
++) {
105 out
[0] = *(int *)data
;
111 static void emit_vec8(GLcontext
* ctx
,
112 struct r300_dma_region
*rvb
,
113 GLvoid
*data
, int stride
, int count
)
116 int *out
= (int *)(rvb
->address
+ rvb
->start
);
118 if (RADEON_DEBUG
& DEBUG_VERTS
)
119 fprintf(stderr
, "%s count %d stride %d\n",
120 __FUNCTION__
, count
, stride
);
123 COPY_DWORDS(out
, data
, count
* 2);
125 for (i
= 0; i
< count
; i
++) {
126 out
[0] = *(int *)data
;
127 out
[1] = *(int *)(data
+ 4);
133 static void emit_vec12(GLcontext
* ctx
,
134 struct r300_dma_region
*rvb
,
135 GLvoid
*data
, int stride
, int count
)
138 int *out
= (int *)(rvb
->address
+ rvb
->start
);
140 if (RADEON_DEBUG
& DEBUG_VERTS
)
141 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
142 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
145 COPY_DWORDS(out
, data
, count
* 3);
147 for (i
= 0; i
< count
; i
++) {
148 out
[0] = *(int *)data
;
149 out
[1] = *(int *)(data
+ 4);
150 out
[2] = *(int *)(data
+ 8);
156 static void emit_vec16(GLcontext
* ctx
,
157 struct r300_dma_region
*rvb
,
158 GLvoid
*data
, int stride
, int count
)
161 int *out
= (int *)(rvb
->address
+ rvb
->start
);
163 if (RADEON_DEBUG
& DEBUG_VERTS
)
164 fprintf(stderr
, "%s count %d stride %d\n",
165 __FUNCTION__
, count
, stride
);
168 COPY_DWORDS(out
, data
, count
* 4);
170 for (i
= 0; i
< count
; i
++) {
171 out
[0] = *(int *)data
;
172 out
[1] = *(int *)(data
+ 4);
173 out
[2] = *(int *)(data
+ 8);
174 out
[3] = *(int *)(data
+ 12);
180 static void emit_vector(GLcontext
* ctx
,
181 struct r300_dma_region
*rvb
,
182 GLvoid
*data
, int size
, int stride
, int count
)
184 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
186 if (RADEON_DEBUG
& DEBUG_VERTS
)
187 fprintf(stderr
, "%s count %d size %d stride %d\n",
188 __FUNCTION__
, count
, size
, stride
);
190 /* Gets triggered when playing with future_hw_tcl_on ...*/
194 r300AllocDmaRegion(rmesa
, rvb
, size
* 4, 4);
196 rvb
->aos_offset
= GET_START(rvb
);
199 r300AllocDmaRegion(rmesa
, rvb
, size
* count
* 4, 4); /* alignment? */
200 rvb
->aos_offset
= GET_START(rvb
);
201 rvb
->aos_stride
= size
;
208 emit_vec4(ctx
, rvb
, data
, stride
, count
);
211 emit_vec8(ctx
, rvb
, data
, stride
, count
);
214 emit_vec12(ctx
, rvb
, data
, stride
, count
);
217 emit_vec16(ctx
, rvb
, data
, stride
, count
);
227 void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
, int elt_size
)
229 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
230 struct r300_dma_region
*rvb
=&rmesa
->state
.elt_dma
;
233 assert(elt_size
== 2 || elt_size
== 4);
235 if(r300IsGartMemory(rmesa
, elts
, n_elts
* elt_size
)){
236 rvb
->address
= rmesa
->radeon
.radeonScreen
->gartTextures
.map
;
237 rvb
->start
= ((char *)elts
) - rvb
->address
;
238 rvb
->aos_offset
= rmesa
->radeon
.radeonScreen
->gart_texture_offset
+ rvb
->start
;
241 }else if(r300IsGartMemory(rmesa
, elts
, 1)){
242 WARN_ONCE("Pointer not within GART memory!\n");
246 r300AllocDmaRegion(rmesa
, rvb
, n_elts
* elt_size
, elt_size
);
247 rvb
->aos_offset
= GET_START(rvb
);
249 out
= rvb
->address
+ rvb
->start
;
250 memcpy(out
, elts
, n_elts
* elt_size
);
253 static GLuint
t_type(struct dt
*dt
)
256 case GL_UNSIGNED_BYTE
:
257 return AOS_FORMAT_UBYTE
;
260 return AOS_FORMAT_USHORT
;
263 return AOS_FORMAT_FLOAT
;
270 return AOS_FORMAT_FLOAT
;
273 static GLuint
t_vir0_size(struct dt
*dt
)
276 case GL_UNSIGNED_BYTE
:
293 static GLuint
t_aos_size(struct dt
*dt
)
296 case GL_UNSIGNED_BYTE
:
313 static GLuint
t_vir0(uint32_t *dst
, struct dt
*dt
, int *inputs
, GLint
*tab
, GLuint nr
)
317 for (i
= 0; i
+ 1 < nr
; i
+= 2){
318 dw
= t_vir0_size(&dt
[tab
[i
]]) | (inputs
[tab
[i
]] << 8) | (t_type(&dt
[tab
[i
]]) << 14);
319 dw
|= (t_vir0_size(&dt
[tab
[i
+ 1]]) | (inputs
[tab
[i
+ 1]] << 8) | (t_type(&dt
[tab
[i
+ 1]]) << 14)) << 16;
322 dw
|= (1 << (13 + 16));
328 dw
= t_vir0_size(&dt
[tab
[nr
- 1]]) | (inputs
[tab
[nr
- 1]] << 8) | (t_type(&dt
[tab
[nr
- 1]]) << 14);
334 return (nr
+ 1) >> 1;
337 static GLuint
t_swizzle(int swizzle
[4])
339 return (swizzle
[0] << R300_INPUT_ROUTE_X_SHIFT
) |
340 (swizzle
[1] << R300_INPUT_ROUTE_Y_SHIFT
) |
341 (swizzle
[2] << R300_INPUT_ROUTE_Z_SHIFT
) |
342 (swizzle
[3] << R300_INPUT_ROUTE_W_SHIFT
);
345 static GLuint
t_vir1(uint32_t *dst
, int swizzle
[][4], GLuint nr
)
349 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
350 dst
[i
>> 1] = t_swizzle(swizzle
[i
]) | R300_INPUT_ROUTE_ENABLE
;
351 dst
[i
>> 1] |= (t_swizzle(swizzle
[i
+ 1]) | R300_INPUT_ROUTE_ENABLE
) << 16;
355 dst
[nr
>> 1] = t_swizzle(swizzle
[nr
- 1]) | R300_INPUT_ROUTE_ENABLE
;
357 return (nr
+ 1) >> 1;
360 static GLuint
t_emit_size(struct dt
*dt
)
365 static GLuint
t_vic(GLcontext
* ctx
, GLuint InputsRead
)
367 r300ContextPtr r300
= R300_CONTEXT(ctx
);
370 if (InputsRead
& (1 << VERT_ATTRIB_POS
))
371 vic_1
|= R300_INPUT_CNTL_POS
;
373 if (InputsRead
& (1 << VERT_ATTRIB_NORMAL
))
374 vic_1
|= R300_INPUT_CNTL_NORMAL
;
376 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
377 vic_1
|= R300_INPUT_CNTL_COLOR
;
379 r300
->state
.texture
.tc_count
= 0;
380 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
381 if (InputsRead
& (1 << (VERT_ATTRIB_TEX0
+ i
))) {
382 r300
->state
.texture
.tc_count
++;
383 vic_1
|= R300_INPUT_CNTL_TC0
<< i
;
389 /* Emit vertex data to GART memory
390 * Route inputs to the vertex processor
391 * This function should never return R300_FALLBACK_TCL when using software tcl.
394 int r300EmitArrays(GLcontext
*ctx
)
396 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
397 r300ContextPtr r300
= rmesa
;
398 struct radeon_vertex_buffer
*VB
= &rmesa
->state
.VB
;
400 GLuint count
= VB
->Count
;
402 GLuint InputsRead
= 0, OutputsWritten
= 0;
404 GLint tab
[VERT_ATTRIB_MAX
];
405 int swizzle
[VERT_ATTRIB_MAX
][4];
408 struct r300_vertex_program
*prog
=(struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
409 inputs
= prog
->inputs
;
410 InputsRead
= CURRENT_VERTEX_SHADER(ctx
)->key
.InputsRead
;
411 OutputsWritten
= CURRENT_VERTEX_SHADER(ctx
)->key
.OutputsWritten
;
413 DECLARE_RENDERINPUTS(inputs_bitset
);
414 inputs
= r300
->state
.sw_tcl_inputs
;
416 RENDERINPUTS_COPY( inputs_bitset
, TNL_CONTEXT(ctx
)->render_inputs_bitset
);
418 assert(RENDERINPUTS_TEST( inputs_bitset
, _TNL_ATTRIB_POS
));
419 InputsRead
|= 1 << VERT_ATTRIB_POS
;
420 OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
422 assert(RENDERINPUTS_TEST( inputs_bitset
, _TNL_ATTRIB_NORMAL
) == 0);
424 assert(RENDERINPUTS_TEST( inputs_bitset
, _TNL_ATTRIB_COLOR0
));
425 InputsRead
|= 1 << VERT_ATTRIB_COLOR0
;
426 OutputsWritten
|= 1 << VERT_RESULT_COL0
;
428 if (RENDERINPUTS_TEST( inputs_bitset
, _TNL_ATTRIB_COLOR1
)) {
429 InputsRead
|= 1 << VERT_ATTRIB_COLOR1
;
430 OutputsWritten
|= 1 << VERT_RESULT_COL1
;
433 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
434 if (RENDERINPUTS_TEST( inputs_bitset
, _TNL_ATTRIB_TEX(i
) )) {
435 InputsRead
|= 1 << (VERT_ATTRIB_TEX0
+ i
);
436 OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
439 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
440 if (InputsRead
& (1 << i
))
445 RENDERINPUTS_COPY( rmesa
->state
.render_inputs_bitset
, inputs_bitset
);
448 assert(OutputsWritten
);
450 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
451 if (InputsRead
& (1 << i
))
454 if (nr
> R300_MAX_AOS_ARRAYS
)
455 return R300_FALLBACK_TCL
;
457 for (i
= 0; i
< nr
; i
++) {
459 int comp_size
, fix
, found
= 0;
461 swizzle
[i
][0] = SWIZZLE_ZERO
;
462 swizzle
[i
][1] = SWIZZLE_ZERO
;
463 swizzle
[i
][2] = SWIZZLE_ZERO
;
464 swizzle
[i
][3] = SWIZZLE_ONE
;
466 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
470 #define SWAP_INT(a, b) do { \
477 if (VB
->AttribPtr
[tab
[i
]].type
== GL_UNSIGNED_BYTE
) {
478 SWAP_INT(swizzle
[i
][0], swizzle
[i
][3]);
479 SWAP_INT(swizzle
[i
][1], swizzle
[i
][2]);
481 #endif /* MESA_BIG_ENDIAN */
483 if (r300IsGartMemory(rmesa
, VB
->AttribPtr
[tab
[i
]].data
, /*(count-1)*stride */ 4)) {
484 if (VB
->AttribPtr
[tab
[i
]].stride
% 4)
485 return R300_FALLBACK_TCL
;
487 rmesa
->state
.aos
[i
].address
= VB
->AttribPtr
[tab
[i
]].data
;
488 rmesa
->state
.aos
[i
].start
= 0;
489 rmesa
->state
.aos
[i
].aos_offset
= r300GartOffsetFromVirtual(rmesa
, VB
->AttribPtr
[tab
[i
]].data
);
490 rmesa
->state
.aos
[i
].aos_stride
= VB
->AttribPtr
[tab
[i
]].stride
/ 4;
492 rmesa
->state
.aos
[i
].aos_size
= t_emit_size(&VB
->AttribPtr
[tab
[i
]]);
494 /* TODO: emit_vector can only handle 4 byte vectors */
495 if (VB
->AttribPtr
[tab
[i
]].type
!= GL_FLOAT
)
496 return R300_FALLBACK_TCL
;
498 emit_vector(ctx
, &rmesa
->state
.aos
[i
], VB
->AttribPtr
[tab
[i
]].data
,
499 t_emit_size(&VB
->AttribPtr
[tab
[i
]]), VB
->AttribPtr
[tab
[i
]].stride
, count
);
502 rmesa
->state
.aos
[i
].aos_size
= t_aos_size(&VB
->AttribPtr
[tab
[i
]]);
504 comp_size
= _mesa_sizeof_type(VB
->AttribPtr
[tab
[i
]].type
);
506 for (fix
= 0; fix
<= 4 - VB
->AttribPtr
[tab
[i
]].size
; fix
++) {
507 if ((rmesa
->state
.aos
[i
].aos_offset
- comp_size
* fix
) % 4)
516 WARN_ONCE("Feeling lucky?\n");
519 rmesa
->state
.aos
[i
].aos_offset
-= comp_size
* fix
;
521 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
522 swizzle
[i
][ci
] += fix
;
524 WARN_ONCE("Cannot handle offset %x with stride %d, comp %d\n",
525 rmesa
->state
.aos
[i
].aos_offset
, rmesa
->state
.aos
[i
].aos_stride
, VB
->AttribPtr
[tab
[i
]].size
);
526 return R300_FALLBACK_TCL
;
530 /* setup INPUT_ROUTE */
531 R300_STATECHANGE(r300
, vir
[0]);
532 ((drm_r300_cmd_header_t
*)r300
->hw
.vir
[0].cmd
)->packet0
.count
=
533 t_vir0(&r300
->hw
.vir
[0].cmd
[R300_VIR_CNTL_0
], VB
->AttribPtr
, inputs
, tab
, nr
);
535 R300_STATECHANGE(r300
, vir
[1]);
536 ((drm_r300_cmd_header_t
*)r300
->hw
.vir
[1].cmd
)->packet0
.count
=
537 t_vir1(&r300
->hw
.vir
[1].cmd
[R300_VIR_CNTL_0
], swizzle
, nr
);
539 /* Set up input_cntl */
540 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
541 R300_STATECHANGE(r300
, vic
);
542 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_0
] = 0x5555; /* Hard coded value, no idea what it means */
543 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_1
] = t_vic(ctx
, InputsRead
);
545 /* Stage 3: VAP output */
547 R300_STATECHANGE(r300
, vof
);
549 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
]=0;
550 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
]=0;
552 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
))
553 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
;
555 if (OutputsWritten
& (1 << VERT_RESULT_COL0
))
556 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT
;
558 if (OutputsWritten
& (1 << VERT_RESULT_COL1
))
559 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT
;
561 /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
562 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
564 if(OutputsWritten & (1 << VERT_RESULT_BFC1))
565 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;*/
566 //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
568 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
569 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT
;
571 for(i
=0;i
< ctx
->Const
.MaxTextureUnits
;i
++)
572 if(OutputsWritten
& (1 << (VERT_RESULT_TEX0
+ i
)))
573 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] |= (4 << (3 * i
));
575 rmesa
->state
.aos_count
= nr
;
577 return R300_FALLBACK_NONE
;
581 void r300UseArrays(GLcontext
* ctx
)
583 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
586 if(rmesa
->state
.elt_dma
.buf
)
587 radeon_mm_use(rmesa
, rmesa
->state
.elt_dma
.buf
->id
);
589 for (i
=0; i
< rmesa
->state
.aos_count
;i
++) {
590 if (rmesa
->state
.aos
[i
].buf
)
591 radeon_mm_use(rmesa
, rmesa
->state
.aos
[i
].buf
->id
);
598 if (ctx->Array.ArrayObj->a.BufferObj->Name \
599 && ctx->Array.ArrayObj->a.Enabled) \
600 radeon_mm_use(rmesa, ((struct r300_buffer_object *)ctx->Array.ArrayObj->a.BufferObj)->id); \
603 if (ctx
->Array
.ElementArrayBufferObj
->Name
&& ctx
->Array
.ElementArrayBufferObj
->OnCard
)
604 radeon_mm_use(rmesa
, ((struct r300_buffer_object
*)ctx
->Array
.ElementArrayBufferObj
)->id
);
609 USE_VBO(SecondaryColor
);
612 for (i
=0; i
< MAX_TEXTURE_COORD_UNITS
; i
++)
613 USE_VBO(TexCoord
[i
]);
619 void r300ReleaseArrays(GLcontext
* ctx
)
621 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
624 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.elt_dma
, __FUNCTION__
);
625 for (i
=0;i
<rmesa
->state
.aos_count
;i
++) {
626 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.aos
[i
], __FUNCTION__
);