2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keith@tungstengraphics.com>
42 #include "swrast_setup/swrast_setup.h"
43 #include "math/m_translate.h"
45 #include "tnl/t_context.h"
47 #include "r300_context.h"
48 #include "radeon_ioctl.h"
49 #include "r300_state.h"
50 #include "r300_maos.h"
51 #include "r300_ioctl.h"
57 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
58 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
59 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
60 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
61 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
62 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
63 #error Cannot change these!
66 #define DEBUG_ALL DEBUG_VERTS
68 #if defined(USE_X86_ASM)
69 #define COPY_DWORDS( dst, src, nr ) \
72 __asm__ __volatile__( "rep ; movsl" \
73 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
79 #define COPY_DWORDS( dst, src, nr ) \
82 for ( j = 0 ; j < nr ; j++ ) \
83 dst[j] = ((int *)src)[j]; \
88 static void emit_vec4(GLcontext
* ctx
,
89 struct r300_dma_region
*rvb
,
90 GLvoid
* data
, int stride
, int count
)
93 int *out
= (int *)(rvb
->address
+ rvb
->start
);
95 if (RADEON_DEBUG
& DEBUG_VERTS
)
96 fprintf(stderr
, "%s count %d stride %d\n",
97 __FUNCTION__
, count
, stride
);
100 COPY_DWORDS(out
, data
, count
);
102 for (i
= 0; i
< count
; i
++) {
103 out
[0] = *(int *)data
;
109 static void emit_vec8(GLcontext
* ctx
,
110 struct r300_dma_region
*rvb
,
111 GLvoid
* data
, int stride
, int count
)
114 int *out
= (int *)(rvb
->address
+ rvb
->start
);
116 if (RADEON_DEBUG
& DEBUG_VERTS
)
117 fprintf(stderr
, "%s count %d stride %d\n",
118 __FUNCTION__
, count
, stride
);
121 COPY_DWORDS(out
, data
, count
* 2);
123 for (i
= 0; i
< count
; i
++) {
124 out
[0] = *(int *)data
;
125 out
[1] = *(int *)(data
+ 4);
131 static void emit_vec12(GLcontext
* ctx
,
132 struct r300_dma_region
*rvb
,
133 GLvoid
* data
, int stride
, int count
)
136 int *out
= (int *)(rvb
->address
+ rvb
->start
);
138 if (RADEON_DEBUG
& DEBUG_VERTS
)
139 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
140 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
143 COPY_DWORDS(out
, data
, count
* 3);
145 for (i
= 0; i
< count
; i
++) {
146 out
[0] = *(int *)data
;
147 out
[1] = *(int *)(data
+ 4);
148 out
[2] = *(int *)(data
+ 8);
154 static void emit_vec16(GLcontext
* ctx
,
155 struct r300_dma_region
*rvb
,
156 GLvoid
* data
, int stride
, int count
)
159 int *out
= (int *)(rvb
->address
+ rvb
->start
);
161 if (RADEON_DEBUG
& DEBUG_VERTS
)
162 fprintf(stderr
, "%s count %d stride %d\n",
163 __FUNCTION__
, count
, stride
);
166 COPY_DWORDS(out
, data
, count
* 4);
168 for (i
= 0; i
< count
; i
++) {
169 out
[0] = *(int *)data
;
170 out
[1] = *(int *)(data
+ 4);
171 out
[2] = *(int *)(data
+ 8);
172 out
[3] = *(int *)(data
+ 12);
178 static void emit_vector(GLcontext
* ctx
,
179 struct r300_dma_region
*rvb
,
180 GLvoid
* data
, int size
, int stride
, int count
)
182 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
184 if (RADEON_DEBUG
& DEBUG_VERTS
)
185 fprintf(stderr
, "%s count %d size %d stride %d\n",
186 __FUNCTION__
, count
, size
, stride
);
188 /* Gets triggered when playing with future_hw_tcl_on ... */
192 r300AllocDmaRegion(rmesa
, rvb
, size
* 4, 4);
194 rvb
->aos_offset
= GET_START(rvb
);
197 r300AllocDmaRegion(rmesa
, rvb
, size
* count
* 4, 4); /* alignment? */
198 rvb
->aos_offset
= GET_START(rvb
);
199 rvb
->aos_stride
= size
;
206 emit_vec4(ctx
, rvb
, data
, stride
, count
);
209 emit_vec8(ctx
, rvb
, data
, stride
, count
);
212 emit_vec12(ctx
, rvb
, data
, stride
, count
);
215 emit_vec16(ctx
, rvb
, data
, stride
, count
);
225 void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
,
228 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
229 struct r300_dma_region
*rvb
= &rmesa
->state
.elt_dma
;
232 assert(elt_size
== 2 || elt_size
== 4);
234 if (r300IsGartMemory(rmesa
, elts
, n_elts
* elt_size
)) {
235 rvb
->address
= rmesa
->radeon
.radeonScreen
->gartTextures
.map
;
236 rvb
->start
= ((char *)elts
) - rvb
->address
;
238 rmesa
->radeon
.radeonScreen
->gart_texture_offset
+
242 } else if (r300IsGartMemory(rmesa
, elts
, 1)) {
243 WARN_ONCE("Pointer not within GART memory!\n");
247 r300AllocDmaRegion(rmesa
, rvb
, n_elts
* elt_size
, elt_size
);
248 rvb
->aos_offset
= GET_START(rvb
);
250 out
= rvb
->address
+ rvb
->start
;
251 memcpy(out
, elts
, n_elts
* elt_size
);
254 static GLuint
t_type(struct dt
*dt
)
257 case GL_UNSIGNED_BYTE
:
258 return AOS_FORMAT_UBYTE
;
261 return AOS_FORMAT_USHORT
;
264 return AOS_FORMAT_FLOAT
;
271 return AOS_FORMAT_FLOAT
;
274 static GLuint
t_vir0_size(struct dt
*dt
)
277 case GL_UNSIGNED_BYTE
:
294 static GLuint
t_aos_size(struct dt
*dt
)
297 case GL_UNSIGNED_BYTE
:
314 static GLuint
t_vir0(uint32_t * dst
, struct dt
*dt
, int *inputs
,
315 GLint
* tab
, GLuint nr
)
319 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
320 dw
= t_vir0_size(&dt
[tab
[i
]]) | (inputs
[tab
[i
]] << 8) |
321 (t_type(&dt
[tab
[i
]]) << 14);
323 (t_vir0_size(&dt
[tab
[i
+ 1]]) |
324 (inputs
[tab
[i
+ 1]] << 8) | (t_type(&dt
[tab
[i
+ 1]])
328 dw
|= (1 << (13 + 16));
334 dw
= t_vir0_size(&dt
[tab
[nr
- 1]]) | (inputs
[tab
[nr
- 1]]
336 (t_type(&dt
[tab
[nr
- 1]]) << 14);
342 return (nr
+ 1) >> 1;
345 static GLuint
t_swizzle(int swizzle
[4])
347 return (swizzle
[0] << R300_INPUT_ROUTE_X_SHIFT
) |
348 (swizzle
[1] << R300_INPUT_ROUTE_Y_SHIFT
) |
349 (swizzle
[2] << R300_INPUT_ROUTE_Z_SHIFT
) |
350 (swizzle
[3] << R300_INPUT_ROUTE_W_SHIFT
);
353 static GLuint
t_vir1(uint32_t * dst
, int swizzle
[][4], GLuint nr
)
357 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
358 dst
[i
>> 1] = t_swizzle(swizzle
[i
]) | R300_INPUT_ROUTE_ENABLE
;
360 (t_swizzle(swizzle
[i
+ 1]) | R300_INPUT_ROUTE_ENABLE
)
366 t_swizzle(swizzle
[nr
- 1]) | R300_INPUT_ROUTE_ENABLE
;
368 return (nr
+ 1) >> 1;
371 static GLuint
t_emit_size(struct dt
*dt
)
376 static GLuint
t_vic(GLcontext
* ctx
, GLuint InputsRead
)
378 r300ContextPtr r300
= R300_CONTEXT(ctx
);
381 if (InputsRead
& (1 << VERT_ATTRIB_POS
))
382 vic_1
|= R300_INPUT_CNTL_POS
;
384 if (InputsRead
& (1 << VERT_ATTRIB_NORMAL
))
385 vic_1
|= R300_INPUT_CNTL_NORMAL
;
387 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
388 vic_1
|= R300_INPUT_CNTL_COLOR
;
390 r300
->state
.texture
.tc_count
= 0;
391 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
392 if (InputsRead
& (1 << (VERT_ATTRIB_TEX0
+ i
))) {
393 r300
->state
.texture
.tc_count
++;
394 vic_1
|= R300_INPUT_CNTL_TC0
<< i
;
400 /* Emit vertex data to GART memory
401 * Route inputs to the vertex processor
402 * This function should never return R300_FALLBACK_TCL when using software tcl.
405 int r300EmitArrays(GLcontext
* ctx
)
407 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
408 r300ContextPtr r300
= rmesa
;
409 struct radeon_vertex_buffer
*VB
= &rmesa
->state
.VB
;
411 GLuint count
= VB
->Count
;
413 GLuint InputsRead
= 0, OutputsWritten
= 0;
415 int vir_inputs
[VERT_ATTRIB_MAX
];
416 GLint tab
[VERT_ATTRIB_MAX
];
417 int swizzle
[VERT_ATTRIB_MAX
][4];
420 struct r300_vertex_program
*prog
=
421 (struct r300_vertex_program
*)
422 CURRENT_VERTEX_SHADER(ctx
);
423 inputs
= prog
->inputs
;
424 InputsRead
= CURRENT_VERTEX_SHADER(ctx
)->key
.InputsRead
;
425 OutputsWritten
= CURRENT_VERTEX_SHADER(ctx
)->key
.OutputsWritten
;
427 DECLARE_RENDERINPUTS(inputs_bitset
);
428 inputs
= r300
->state
.sw_tcl_inputs
;
430 RENDERINPUTS_COPY(inputs_bitset
,
431 TNL_CONTEXT(ctx
)->render_inputs_bitset
);
433 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_POS
));
434 InputsRead
|= 1 << VERT_ATTRIB_POS
;
435 OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
437 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_NORMAL
)
440 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR0
));
441 InputsRead
|= 1 << VERT_ATTRIB_COLOR0
;
442 OutputsWritten
|= 1 << VERT_RESULT_COL0
;
444 if (RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR1
)) {
445 InputsRead
|= 1 << VERT_ATTRIB_COLOR1
;
446 OutputsWritten
|= 1 << VERT_RESULT_COL1
;
449 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
450 if (RENDERINPUTS_TEST
451 (inputs_bitset
, _TNL_ATTRIB_TEX(i
))) {
452 InputsRead
|= 1 << (VERT_ATTRIB_TEX0
+ i
);
453 OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
456 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
457 if (InputsRead
& (1 << i
))
463 (r300
->radeon
.radeonScreen
->
464 chip_flags
& RADEON_CHIPSET_TCL
)) {
465 /* Fixed, apply to vir0 only */
466 memcpy(vir_inputs
, inputs
,
467 VERT_ATTRIB_MAX
* sizeof(int));
470 if (InputsRead
& VERT_ATTRIB_POS
)
471 inputs
[VERT_ATTRIB_POS
] = 0;
473 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
474 inputs
[VERT_ATTRIB_COLOR0
] = 2;
476 if (InputsRead
& (1 << VERT_ATTRIB_COLOR1
))
477 inputs
[VERT_ATTRIB_COLOR1
] = 3;
479 for (i
= VERT_ATTRIB_TEX0
; i
<= VERT_ATTRIB_TEX7
; i
++)
480 if (InputsRead
& (1 << i
))
481 inputs
[i
] = 6 + (i
- VERT_ATTRIB_TEX0
);
484 RENDERINPUTS_COPY(rmesa
->state
.render_inputs_bitset
,
488 assert(OutputsWritten
);
490 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
491 if (InputsRead
& (1 << i
))
494 if (nr
> R300_MAX_AOS_ARRAYS
)
495 return R300_FALLBACK_TCL
;
497 for (i
= 0; i
< nr
; i
++) {
499 int comp_size
, fix
, found
= 0;
501 swizzle
[i
][0] = SWIZZLE_ZERO
;
502 swizzle
[i
][1] = SWIZZLE_ZERO
;
503 swizzle
[i
][2] = SWIZZLE_ZERO
;
504 swizzle
[i
][3] = SWIZZLE_ONE
;
506 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
510 #define SWAP_INT(a, b) do { \
517 if (VB
->AttribPtr
[tab
[i
]].type
== GL_UNSIGNED_BYTE
) {
518 SWAP_INT(swizzle
[i
][0], swizzle
[i
][3]);
519 SWAP_INT(swizzle
[i
][1], swizzle
[i
][2]);
521 #endif /* MESA_BIG_ENDIAN */
523 if (r300IsGartMemory(rmesa
, VB
->AttribPtr
[tab
[i
]].data
,
524 /*(count-1)*stride */ 4)) {
525 if (VB
->AttribPtr
[tab
[i
]].stride
% 4)
526 return R300_FALLBACK_TCL
;
528 rmesa
->state
.aos
[i
].address
=
529 VB
->AttribPtr
[tab
[i
]].data
;
530 rmesa
->state
.aos
[i
].start
= 0;
531 rmesa
->state
.aos
[i
].aos_offset
=
532 r300GartOffsetFromVirtual(rmesa
,
534 AttribPtr
[tab
[i
]].data
);
535 rmesa
->state
.aos
[i
].aos_stride
=
536 VB
->AttribPtr
[tab
[i
]].stride
/ 4;
538 rmesa
->state
.aos
[i
].aos_size
=
539 t_emit_size(&VB
->AttribPtr
[tab
[i
]]);
541 /* TODO: emit_vector can only handle 4 byte vectors */
542 if (VB
->AttribPtr
[tab
[i
]].type
!= GL_FLOAT
)
543 return R300_FALLBACK_TCL
;
545 emit_vector(ctx
, &rmesa
->state
.aos
[i
],
546 VB
->AttribPtr
[tab
[i
]].data
,
547 t_emit_size(&VB
->AttribPtr
[tab
[i
]]),
548 VB
->AttribPtr
[tab
[i
]].stride
, count
);
551 rmesa
->state
.aos
[i
].aos_size
=
552 t_aos_size(&VB
->AttribPtr
[tab
[i
]]);
554 comp_size
= _mesa_sizeof_type(VB
->AttribPtr
[tab
[i
]].type
);
556 for (fix
= 0; fix
<= 4 - VB
->AttribPtr
[tab
[i
]].size
; fix
++) {
557 if ((rmesa
->state
.aos
[i
].aos_offset
-
558 comp_size
* fix
) % 4)
567 WARN_ONCE("Feeling lucky?\n");
570 rmesa
->state
.aos
[i
].aos_offset
-= comp_size
* fix
;
572 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
573 swizzle
[i
][ci
] += fix
;
576 ("Cannot handle offset %x with stride %d, comp %d\n",
577 rmesa
->state
.aos
[i
].aos_offset
,
578 rmesa
->state
.aos
[i
].aos_stride
,
579 VB
->AttribPtr
[tab
[i
]].size
);
580 return R300_FALLBACK_TCL
;
584 /* setup INPUT_ROUTE */
585 R300_STATECHANGE(r300
, vir
[0]);
586 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[0].cmd
)->packet0
.count
=
587 t_vir0(&r300
->hw
.vir
[0].cmd
[R300_VIR_CNTL_0
], VB
->AttribPtr
,
590 R300_STATECHANGE(r300
, vir
[1]);
591 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[1].cmd
)->packet0
.count
=
592 t_vir1(&r300
->hw
.vir
[1].cmd
[R300_VIR_CNTL_0
], swizzle
, nr
);
594 /* Set up input_cntl */
595 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
596 R300_STATECHANGE(r300
, vic
);
597 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_0
] = 0x5555; /* Hard coded value, no idea what it means */
598 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_1
] = t_vic(ctx
, InputsRead
);
600 /* Stage 3: VAP output */
602 R300_STATECHANGE(r300
, vof
);
604 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] = 0;
605 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] = 0;
607 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
))
608 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
609 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
;
611 if (OutputsWritten
& (1 << VERT_RESULT_COL0
))
612 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
613 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT
;
615 if (OutputsWritten
& (1 << VERT_RESULT_COL1
))
616 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
617 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT
;
619 /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
620 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
622 if(OutputsWritten & (1 << VERT_RESULT_BFC1))
623 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
624 //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
626 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
627 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
628 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT
;
630 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
631 if (OutputsWritten
& (1 << (VERT_RESULT_TEX0
+ i
)))
632 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] |= (4 << (3 * i
));
634 rmesa
->state
.aos_count
= nr
;
636 return R300_FALLBACK_NONE
;
640 void r300UseArrays(GLcontext
* ctx
)
642 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
645 if (rmesa
->state
.elt_dma
.buf
)
646 r300_mem_use(rmesa
, rmesa
->state
.elt_dma
.buf
->id
);
648 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
649 if (rmesa
->state
.aos
[i
].buf
)
650 r300_mem_use(rmesa
, rmesa
->state
.aos
[i
].buf
->id
);
655 void r300ReleaseArrays(GLcontext
* ctx
)
657 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
660 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.elt_dma
, __FUNCTION__
);
661 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
662 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.aos
[i
], __FUNCTION__
);