2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
33 * \author Keith Whitwell <keith@tungstengraphics.com>
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
46 #include "tnl/t_context.h"
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
67 #define DEBUG_ALL DEBUG_VERTS
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
80 #define COPY_DWORDS( dst, src, nr ) \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
89 static void r300EmitVec4(GLcontext
* ctx
,
90 struct r300_dma_region
*rvb
,
91 GLvoid
* data
, int stride
, int count
)
94 int *out
= (int *)(rvb
->address
+ rvb
->start
);
96 if (RADEON_DEBUG
& DEBUG_VERTS
)
97 fprintf(stderr
, "%s count %d stride %d\n",
98 __FUNCTION__
, count
, stride
);
101 COPY_DWORDS(out
, data
, count
);
103 for (i
= 0; i
< count
; i
++) {
104 out
[0] = *(int *)data
;
110 static void r300EmitVec8(GLcontext
* ctx
,
111 struct r300_dma_region
*rvb
,
112 GLvoid
* data
, int stride
, int count
)
115 int *out
= (int *)(rvb
->address
+ rvb
->start
);
117 if (RADEON_DEBUG
& DEBUG_VERTS
)
118 fprintf(stderr
, "%s count %d stride %d\n",
119 __FUNCTION__
, count
, stride
);
122 COPY_DWORDS(out
, data
, count
* 2);
124 for (i
= 0; i
< count
; i
++) {
125 out
[0] = *(int *)data
;
126 out
[1] = *(int *)(data
+ 4);
132 static void r300EmitVec12(GLcontext
* ctx
,
133 struct r300_dma_region
*rvb
,
134 GLvoid
* data
, int stride
, int count
)
137 int *out
= (int *)(rvb
->address
+ rvb
->start
);
139 if (RADEON_DEBUG
& DEBUG_VERTS
)
140 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
141 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
144 COPY_DWORDS(out
, data
, count
* 3);
146 for (i
= 0; i
< count
; i
++) {
147 out
[0] = *(int *)data
;
148 out
[1] = *(int *)(data
+ 4);
149 out
[2] = *(int *)(data
+ 8);
155 static void r300EmitVec16(GLcontext
* ctx
,
156 struct r300_dma_region
*rvb
,
157 GLvoid
* data
, int stride
, int count
)
160 int *out
= (int *)(rvb
->address
+ rvb
->start
);
162 if (RADEON_DEBUG
& DEBUG_VERTS
)
163 fprintf(stderr
, "%s count %d stride %d\n",
164 __FUNCTION__
, count
, stride
);
167 COPY_DWORDS(out
, data
, count
* 4);
169 for (i
= 0; i
< count
; i
++) {
170 out
[0] = *(int *)data
;
171 out
[1] = *(int *)(data
+ 4);
172 out
[2] = *(int *)(data
+ 8);
173 out
[3] = *(int *)(data
+ 12);
179 static void r300EmitVec(GLcontext
* ctx
,
180 struct r300_dma_region
*rvb
,
181 GLvoid
* data
, int size
, int stride
, int count
)
183 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
185 if (RADEON_DEBUG
& DEBUG_VERTS
)
186 fprintf(stderr
, "%s count %d size %d stride %d\n",
187 __FUNCTION__
, count
, size
, stride
);
189 /* Gets triggered when playing with future_hw_tcl_on ... */
193 r300AllocDmaRegion(rmesa
, rvb
, size
* 4, 4);
195 rvb
->aos_offset
= GET_START(rvb
);
198 r300AllocDmaRegion(rmesa
, rvb
, size
* count
* 4, 4); /* alignment? */
199 rvb
->aos_offset
= GET_START(rvb
);
200 rvb
->aos_stride
= size
;
207 r300EmitVec4(ctx
, rvb
, data
, stride
, count
);
210 r300EmitVec8(ctx
, rvb
, data
, stride
, count
);
213 r300EmitVec12(ctx
, rvb
, data
, stride
, count
);
216 r300EmitVec16(ctx
, rvb
, data
, stride
, count
);
226 static GLuint
t_type(struct dt
*dt
)
229 case GL_UNSIGNED_BYTE
:
230 return AOS_FORMAT_UBYTE
;
232 return AOS_FORMAT_USHORT
;
234 return AOS_FORMAT_FLOAT
;
240 return AOS_FORMAT_FLOAT
;
243 static GLuint
t_vir0_size(struct dt
*dt
)
246 case GL_UNSIGNED_BYTE
:
260 static GLuint
t_aos_size(struct dt
*dt
)
263 case GL_UNSIGNED_BYTE
:
277 static GLuint
t_vir0(uint32_t * dst
, struct dt
*dt
, int *inputs
,
278 GLint
* tab
, GLuint nr
)
282 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
283 dw
= t_vir0_size(&dt
[tab
[i
]]) | (inputs
[tab
[i
]] << 8) |
284 (t_type(&dt
[tab
[i
]]) << 14);
286 (t_vir0_size(&dt
[tab
[i
+ 1]]) |
287 (inputs
[tab
[i
+ 1]] << 8) | (t_type(&dt
[tab
[i
+ 1]])
291 dw
|= (1 << (13 + 16));
297 dw
= t_vir0_size(&dt
[tab
[nr
- 1]]) | (inputs
[tab
[nr
- 1]]
299 (t_type(&dt
[tab
[nr
- 1]]) << 14);
305 return (nr
+ 1) >> 1;
308 static GLuint
t_swizzle(int swizzle
[4])
310 return (swizzle
[0] << R300_INPUT_ROUTE_X_SHIFT
) |
311 (swizzle
[1] << R300_INPUT_ROUTE_Y_SHIFT
) |
312 (swizzle
[2] << R300_INPUT_ROUTE_Z_SHIFT
) |
313 (swizzle
[3] << R300_INPUT_ROUTE_W_SHIFT
);
316 static GLuint
t_vir1(uint32_t * dst
, int swizzle
[][4], GLuint nr
)
320 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
321 dst
[i
>> 1] = t_swizzle(swizzle
[i
]) | R300_INPUT_ROUTE_ENABLE
;
323 (t_swizzle(swizzle
[i
+ 1]) | R300_INPUT_ROUTE_ENABLE
)
329 t_swizzle(swizzle
[nr
- 1]) | R300_INPUT_ROUTE_ENABLE
;
331 return (nr
+ 1) >> 1;
334 static GLuint
t_emit_size(struct dt
*dt
)
339 static GLuint
t_vic(GLcontext
* ctx
, GLuint InputsRead
)
341 r300ContextPtr r300
= R300_CONTEXT(ctx
);
344 if (InputsRead
& (1 << VERT_ATTRIB_POS
))
345 vic_1
|= R300_INPUT_CNTL_POS
;
347 if (InputsRead
& (1 << VERT_ATTRIB_NORMAL
))
348 vic_1
|= R300_INPUT_CNTL_NORMAL
;
350 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
351 vic_1
|= R300_INPUT_CNTL_COLOR
;
353 r300
->state
.texture
.tc_count
= 0;
354 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
355 if (InputsRead
& (1 << (VERT_ATTRIB_TEX0
+ i
))) {
356 r300
->state
.texture
.tc_count
++;
357 vic_1
|= R300_INPUT_CNTL_TC0
<< i
;
363 /* Emit vertex data to GART memory
364 * Route inputs to the vertex processor
365 * This function should never return R300_FALLBACK_TCL when using software tcl.
368 int r300EmitArrays(GLcontext
* ctx
)
370 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
371 r300ContextPtr r300
= rmesa
;
372 struct radeon_vertex_buffer
*VB
= &rmesa
->state
.VB
;
374 GLuint count
= VB
->Count
;
376 GLuint InputsRead
= 0, OutputsWritten
= 0;
378 int vir_inputs
[VERT_ATTRIB_MAX
];
379 GLint tab
[VERT_ATTRIB_MAX
];
380 int swizzle
[VERT_ATTRIB_MAX
][4];
383 struct r300_vertex_program
*prog
=
384 (struct r300_vertex_program
*)
385 CURRENT_VERTEX_SHADER(ctx
);
386 inputs
= prog
->inputs
;
387 InputsRead
= CURRENT_VERTEX_SHADER(ctx
)->key
.InputsRead
;
388 OutputsWritten
= CURRENT_VERTEX_SHADER(ctx
)->key
.OutputsWritten
;
390 DECLARE_RENDERINPUTS(inputs_bitset
);
391 inputs
= r300
->state
.sw_tcl_inputs
;
393 RENDERINPUTS_COPY(inputs_bitset
,
394 TNL_CONTEXT(ctx
)->render_inputs_bitset
);
396 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_POS
));
397 InputsRead
|= 1 << VERT_ATTRIB_POS
;
398 OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
400 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_NORMAL
)
403 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR0
));
404 InputsRead
|= 1 << VERT_ATTRIB_COLOR0
;
405 OutputsWritten
|= 1 << VERT_RESULT_COL0
;
407 if (RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR1
)) {
408 InputsRead
|= 1 << VERT_ATTRIB_COLOR1
;
409 OutputsWritten
|= 1 << VERT_RESULT_COL1
;
412 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
413 if (RENDERINPUTS_TEST
414 (inputs_bitset
, _TNL_ATTRIB_TEX(i
))) {
415 InputsRead
|= 1 << (VERT_ATTRIB_TEX0
+ i
);
416 OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
419 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
420 if (InputsRead
& (1 << i
))
426 (r300
->radeon
.radeonScreen
->
427 chip_flags
& RADEON_CHIPSET_TCL
)) {
428 /* Fixed, apply to vir0 only */
429 memcpy(vir_inputs
, inputs
,
430 VERT_ATTRIB_MAX
* sizeof(int));
433 if (InputsRead
& VERT_ATTRIB_POS
)
434 inputs
[VERT_ATTRIB_POS
] = 0;
436 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
437 inputs
[VERT_ATTRIB_COLOR0
] = 2;
439 if (InputsRead
& (1 << VERT_ATTRIB_COLOR1
))
440 inputs
[VERT_ATTRIB_COLOR1
] = 3;
442 for (i
= VERT_ATTRIB_TEX0
; i
<= VERT_ATTRIB_TEX7
; i
++)
443 if (InputsRead
& (1 << i
))
444 inputs
[i
] = 6 + (i
- VERT_ATTRIB_TEX0
);
447 RENDERINPUTS_COPY(rmesa
->state
.render_inputs_bitset
,
451 assert(OutputsWritten
);
453 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
454 if (InputsRead
& (1 << i
))
457 if (nr
> R300_MAX_AOS_ARRAYS
)
458 return R300_FALLBACK_TCL
;
460 for (i
= 0; i
< nr
; i
++) {
462 int comp_size
, fix
, found
= 0;
464 swizzle
[i
][0] = SWIZZLE_ZERO
;
465 swizzle
[i
][1] = SWIZZLE_ZERO
;
466 swizzle
[i
][2] = SWIZZLE_ZERO
;
467 swizzle
[i
][3] = SWIZZLE_ONE
;
469 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
473 #define SWAP_INT(a, b) do { \
480 if (VB
->AttribPtr
[tab
[i
]].type
== GL_UNSIGNED_BYTE
) {
481 SWAP_INT(swizzle
[i
][0], swizzle
[i
][3]);
482 SWAP_INT(swizzle
[i
][1], swizzle
[i
][2]);
484 #endif /* MESA_BIG_ENDIAN */
486 if (r300IsGartMemory(rmesa
, VB
->AttribPtr
[tab
[i
]].data
,
487 /*(count-1)*stride */ 4)) {
488 if (VB
->AttribPtr
[tab
[i
]].stride
% 4)
489 return R300_FALLBACK_TCL
;
491 rmesa
->state
.aos
[i
].address
=
492 VB
->AttribPtr
[tab
[i
]].data
;
493 rmesa
->state
.aos
[i
].start
= 0;
494 rmesa
->state
.aos
[i
].aos_offset
=
495 r300GartOffsetFromVirtual(rmesa
,
497 AttribPtr
[tab
[i
]].data
);
498 rmesa
->state
.aos
[i
].aos_stride
=
499 VB
->AttribPtr
[tab
[i
]].stride
/ 4;
501 rmesa
->state
.aos
[i
].aos_size
=
502 t_emit_size(&VB
->AttribPtr
[tab
[i
]]);
504 /* TODO: r300EmitVec can only handle 4 byte vectors */
505 if (VB
->AttribPtr
[tab
[i
]].type
!= GL_FLOAT
)
506 return R300_FALLBACK_TCL
;
508 r300EmitVec(ctx
, &rmesa
->state
.aos
[i
],
509 VB
->AttribPtr
[tab
[i
]].data
,
510 t_emit_size(&VB
->AttribPtr
[tab
[i
]]),
511 VB
->AttribPtr
[tab
[i
]].stride
, count
);
514 rmesa
->state
.aos
[i
].aos_size
=
515 t_aos_size(&VB
->AttribPtr
[tab
[i
]]);
517 comp_size
= _mesa_sizeof_type(VB
->AttribPtr
[tab
[i
]].type
);
519 for (fix
= 0; fix
<= 4 - VB
->AttribPtr
[tab
[i
]].size
; fix
++) {
520 if ((rmesa
->state
.aos
[i
].aos_offset
-
521 comp_size
* fix
) % 4)
530 WARN_ONCE("Feeling lucky?\n");
533 rmesa
->state
.aos
[i
].aos_offset
-= comp_size
* fix
;
535 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
536 swizzle
[i
][ci
] += fix
;
539 ("Cannot handle offset %x with stride %d, comp %d\n",
540 rmesa
->state
.aos
[i
].aos_offset
,
541 rmesa
->state
.aos
[i
].aos_stride
,
542 VB
->AttribPtr
[tab
[i
]].size
);
543 return R300_FALLBACK_TCL
;
547 /* setup INPUT_ROUTE */
548 R300_STATECHANGE(r300
, vir
[0]);
549 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[0].cmd
)->packet0
.count
=
550 t_vir0(&r300
->hw
.vir
[0].cmd
[R300_VIR_CNTL_0
], VB
->AttribPtr
,
553 R300_STATECHANGE(r300
, vir
[1]);
554 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[1].cmd
)->packet0
.count
=
555 t_vir1(&r300
->hw
.vir
[1].cmd
[R300_VIR_CNTL_0
], swizzle
, nr
);
557 /* Set up input_cntl */
558 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
559 R300_STATECHANGE(r300
, vic
);
560 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_0
] = 0x5555; /* Hard coded value, no idea what it means */
561 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_1
] = t_vic(ctx
, InputsRead
);
563 /* Stage 3: VAP output */
565 R300_STATECHANGE(r300
, vof
);
567 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] = 0;
568 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] = 0;
570 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
))
571 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
572 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
;
574 if (OutputsWritten
& (1 << VERT_RESULT_COL0
))
575 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
576 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT
;
578 if (OutputsWritten
& (1 << VERT_RESULT_COL1
))
579 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
580 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT
;
582 /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
583 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
585 if(OutputsWritten & (1 << VERT_RESULT_BFC1))
586 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
587 //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
589 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
590 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
591 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT
;
593 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
594 if (OutputsWritten
& (1 << (VERT_RESULT_TEX0
+ i
)))
595 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] |= (4 << (3 * i
));
597 rmesa
->state
.aos_count
= nr
;
599 return R300_FALLBACK_NONE
;
603 void r300UseArrays(GLcontext
* ctx
)
605 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
608 if (rmesa
->state
.elt_dma
.buf
)
609 r300_mem_use(rmesa
, rmesa
->state
.elt_dma
.buf
->id
);
611 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
612 if (rmesa
->state
.aos
[i
].buf
)
613 r300_mem_use(rmesa
, rmesa
->state
.aos
[i
].buf
->id
);
618 void r300ReleaseArrays(GLcontext
* ctx
)
620 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
623 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.elt_dma
, __FUNCTION__
);
624 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
625 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.aos
[i
], __FUNCTION__
);