2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
33 * \author Keith Whitwell <keith@tungstengraphics.com>
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
46 #include "tnl/t_context.h"
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
67 #define DEBUG_ALL DEBUG_VERTS
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
80 #define COPY_DWORDS( dst, src, nr ) \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
89 static void emit_vec4(GLcontext
* ctx
,
90 struct r300_dma_region
*rvb
,
91 GLvoid
* data
, int stride
, int count
)
94 int *out
= (int *)(rvb
->address
+ rvb
->start
);
96 if (RADEON_DEBUG
& DEBUG_VERTS
)
97 fprintf(stderr
, "%s count %d stride %d\n",
98 __FUNCTION__
, count
, stride
);
101 COPY_DWORDS(out
, data
, count
);
103 for (i
= 0; i
< count
; i
++) {
104 out
[0] = *(int *)data
;
110 static void emit_vec8(GLcontext
* ctx
,
111 struct r300_dma_region
*rvb
,
112 GLvoid
* data
, int stride
, int count
)
115 int *out
= (int *)(rvb
->address
+ rvb
->start
);
117 if (RADEON_DEBUG
& DEBUG_VERTS
)
118 fprintf(stderr
, "%s count %d stride %d\n",
119 __FUNCTION__
, count
, stride
);
122 COPY_DWORDS(out
, data
, count
* 2);
124 for (i
= 0; i
< count
; i
++) {
125 out
[0] = *(int *)data
;
126 out
[1] = *(int *)(data
+ 4);
132 static void emit_vec12(GLcontext
* ctx
,
133 struct r300_dma_region
*rvb
,
134 GLvoid
* data
, int stride
, int count
)
137 int *out
= (int *)(rvb
->address
+ rvb
->start
);
139 if (RADEON_DEBUG
& DEBUG_VERTS
)
140 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
141 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
144 COPY_DWORDS(out
, data
, count
* 3);
146 for (i
= 0; i
< count
; i
++) {
147 out
[0] = *(int *)data
;
148 out
[1] = *(int *)(data
+ 4);
149 out
[2] = *(int *)(data
+ 8);
155 static void emit_vec16(GLcontext
* ctx
,
156 struct r300_dma_region
*rvb
,
157 GLvoid
* data
, int stride
, int count
)
160 int *out
= (int *)(rvb
->address
+ rvb
->start
);
162 if (RADEON_DEBUG
& DEBUG_VERTS
)
163 fprintf(stderr
, "%s count %d stride %d\n",
164 __FUNCTION__
, count
, stride
);
167 COPY_DWORDS(out
, data
, count
* 4);
169 for (i
= 0; i
< count
; i
++) {
170 out
[0] = *(int *)data
;
171 out
[1] = *(int *)(data
+ 4);
172 out
[2] = *(int *)(data
+ 8);
173 out
[3] = *(int *)(data
+ 12);
179 static void emit_vector(GLcontext
* ctx
,
180 struct r300_dma_region
*rvb
,
181 GLvoid
* data
, int size
, int stride
, int count
)
183 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
185 if (RADEON_DEBUG
& DEBUG_VERTS
)
186 fprintf(stderr
, "%s count %d size %d stride %d\n",
187 __FUNCTION__
, count
, size
, stride
);
189 /* Gets triggered when playing with future_hw_tcl_on ... */
193 r300AllocDmaRegion(rmesa
, rvb
, size
* 4, 4);
195 rvb
->aos_offset
= GET_START(rvb
);
198 r300AllocDmaRegion(rmesa
, rvb
, size
* count
* 4, 4); /* alignment? */
199 rvb
->aos_offset
= GET_START(rvb
);
200 rvb
->aos_stride
= size
;
207 emit_vec4(ctx
, rvb
, data
, stride
, count
);
210 emit_vec8(ctx
, rvb
, data
, stride
, count
);
213 emit_vec12(ctx
, rvb
, data
, stride
, count
);
216 emit_vec16(ctx
, rvb
, data
, stride
, count
);
226 static GLuint
t_type(struct dt
*dt
)
229 case GL_UNSIGNED_BYTE
:
230 return AOS_FORMAT_UBYTE
;
233 return AOS_FORMAT_USHORT
;
236 return AOS_FORMAT_FLOAT
;
243 return AOS_FORMAT_FLOAT
;
246 static GLuint
t_vir0_size(struct dt
*dt
)
249 case GL_UNSIGNED_BYTE
:
266 static GLuint
t_aos_size(struct dt
*dt
)
269 case GL_UNSIGNED_BYTE
:
286 static GLuint
t_vir0(uint32_t * dst
, struct dt
*dt
, int *inputs
,
287 GLint
* tab
, GLuint nr
)
291 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
292 dw
= t_vir0_size(&dt
[tab
[i
]]) | (inputs
[tab
[i
]] << 8) |
293 (t_type(&dt
[tab
[i
]]) << 14);
295 (t_vir0_size(&dt
[tab
[i
+ 1]]) |
296 (inputs
[tab
[i
+ 1]] << 8) | (t_type(&dt
[tab
[i
+ 1]])
300 dw
|= (1 << (13 + 16));
306 dw
= t_vir0_size(&dt
[tab
[nr
- 1]]) | (inputs
[tab
[nr
- 1]]
308 (t_type(&dt
[tab
[nr
- 1]]) << 14);
314 return (nr
+ 1) >> 1;
317 static GLuint
t_swizzle(int swizzle
[4])
319 return (swizzle
[0] << R300_INPUT_ROUTE_X_SHIFT
) |
320 (swizzle
[1] << R300_INPUT_ROUTE_Y_SHIFT
) |
321 (swizzle
[2] << R300_INPUT_ROUTE_Z_SHIFT
) |
322 (swizzle
[3] << R300_INPUT_ROUTE_W_SHIFT
);
325 static GLuint
t_vir1(uint32_t * dst
, int swizzle
[][4], GLuint nr
)
329 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
330 dst
[i
>> 1] = t_swizzle(swizzle
[i
]) | R300_INPUT_ROUTE_ENABLE
;
332 (t_swizzle(swizzle
[i
+ 1]) | R300_INPUT_ROUTE_ENABLE
)
338 t_swizzle(swizzle
[nr
- 1]) | R300_INPUT_ROUTE_ENABLE
;
340 return (nr
+ 1) >> 1;
343 static GLuint
t_emit_size(struct dt
*dt
)
348 static GLuint
t_vic(GLcontext
* ctx
, GLuint InputsRead
)
350 r300ContextPtr r300
= R300_CONTEXT(ctx
);
353 if (InputsRead
& (1 << VERT_ATTRIB_POS
))
354 vic_1
|= R300_INPUT_CNTL_POS
;
356 if (InputsRead
& (1 << VERT_ATTRIB_NORMAL
))
357 vic_1
|= R300_INPUT_CNTL_NORMAL
;
359 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
360 vic_1
|= R300_INPUT_CNTL_COLOR
;
362 r300
->state
.texture
.tc_count
= 0;
363 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
364 if (InputsRead
& (1 << (VERT_ATTRIB_TEX0
+ i
))) {
365 r300
->state
.texture
.tc_count
++;
366 vic_1
|= R300_INPUT_CNTL_TC0
<< i
;
372 /* Emit vertex data to GART memory
373 * Route inputs to the vertex processor
374 * This function should never return R300_FALLBACK_TCL when using software tcl.
377 int r300EmitArrays(GLcontext
* ctx
)
379 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
380 r300ContextPtr r300
= rmesa
;
381 struct radeon_vertex_buffer
*VB
= &rmesa
->state
.VB
;
383 GLuint count
= VB
->Count
;
385 GLuint InputsRead
= 0, OutputsWritten
= 0;
387 int vir_inputs
[VERT_ATTRIB_MAX
];
388 GLint tab
[VERT_ATTRIB_MAX
];
389 int swizzle
[VERT_ATTRIB_MAX
][4];
392 struct r300_vertex_program
*prog
=
393 (struct r300_vertex_program
*)
394 CURRENT_VERTEX_SHADER(ctx
);
395 inputs
= prog
->inputs
;
396 InputsRead
= CURRENT_VERTEX_SHADER(ctx
)->key
.InputsRead
;
397 OutputsWritten
= CURRENT_VERTEX_SHADER(ctx
)->key
.OutputsWritten
;
399 DECLARE_RENDERINPUTS(inputs_bitset
);
400 inputs
= r300
->state
.sw_tcl_inputs
;
402 RENDERINPUTS_COPY(inputs_bitset
,
403 TNL_CONTEXT(ctx
)->render_inputs_bitset
);
405 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_POS
));
406 InputsRead
|= 1 << VERT_ATTRIB_POS
;
407 OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
409 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_NORMAL
)
412 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR0
));
413 InputsRead
|= 1 << VERT_ATTRIB_COLOR0
;
414 OutputsWritten
|= 1 << VERT_RESULT_COL0
;
416 if (RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR1
)) {
417 InputsRead
|= 1 << VERT_ATTRIB_COLOR1
;
418 OutputsWritten
|= 1 << VERT_RESULT_COL1
;
421 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
422 if (RENDERINPUTS_TEST
423 (inputs_bitset
, _TNL_ATTRIB_TEX(i
))) {
424 InputsRead
|= 1 << (VERT_ATTRIB_TEX0
+ i
);
425 OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
428 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
429 if (InputsRead
& (1 << i
))
435 (r300
->radeon
.radeonScreen
->
436 chip_flags
& RADEON_CHIPSET_TCL
)) {
437 /* Fixed, apply to vir0 only */
438 memcpy(vir_inputs
, inputs
,
439 VERT_ATTRIB_MAX
* sizeof(int));
442 if (InputsRead
& VERT_ATTRIB_POS
)
443 inputs
[VERT_ATTRIB_POS
] = 0;
445 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
446 inputs
[VERT_ATTRIB_COLOR0
] = 2;
448 if (InputsRead
& (1 << VERT_ATTRIB_COLOR1
))
449 inputs
[VERT_ATTRIB_COLOR1
] = 3;
451 for (i
= VERT_ATTRIB_TEX0
; i
<= VERT_ATTRIB_TEX7
; i
++)
452 if (InputsRead
& (1 << i
))
453 inputs
[i
] = 6 + (i
- VERT_ATTRIB_TEX0
);
456 RENDERINPUTS_COPY(rmesa
->state
.render_inputs_bitset
,
460 assert(OutputsWritten
);
462 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
463 if (InputsRead
& (1 << i
))
466 if (nr
> R300_MAX_AOS_ARRAYS
)
467 return R300_FALLBACK_TCL
;
469 for (i
= 0; i
< nr
; i
++) {
471 int comp_size
, fix
, found
= 0;
473 swizzle
[i
][0] = SWIZZLE_ZERO
;
474 swizzle
[i
][1] = SWIZZLE_ZERO
;
475 swizzle
[i
][2] = SWIZZLE_ZERO
;
476 swizzle
[i
][3] = SWIZZLE_ONE
;
478 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
482 #define SWAP_INT(a, b) do { \
489 if (VB
->AttribPtr
[tab
[i
]].type
== GL_UNSIGNED_BYTE
) {
490 SWAP_INT(swizzle
[i
][0], swizzle
[i
][3]);
491 SWAP_INT(swizzle
[i
][1], swizzle
[i
][2]);
493 #endif /* MESA_BIG_ENDIAN */
495 if (r300IsGartMemory(rmesa
, VB
->AttribPtr
[tab
[i
]].data
,
496 /*(count-1)*stride */ 4)) {
497 if (VB
->AttribPtr
[tab
[i
]].stride
% 4)
498 return R300_FALLBACK_TCL
;
500 rmesa
->state
.aos
[i
].address
=
501 VB
->AttribPtr
[tab
[i
]].data
;
502 rmesa
->state
.aos
[i
].start
= 0;
503 rmesa
->state
.aos
[i
].aos_offset
=
504 r300GartOffsetFromVirtual(rmesa
,
506 AttribPtr
[tab
[i
]].data
);
507 rmesa
->state
.aos
[i
].aos_stride
=
508 VB
->AttribPtr
[tab
[i
]].stride
/ 4;
510 rmesa
->state
.aos
[i
].aos_size
=
511 t_emit_size(&VB
->AttribPtr
[tab
[i
]]);
513 /* TODO: emit_vector can only handle 4 byte vectors */
514 if (VB
->AttribPtr
[tab
[i
]].type
!= GL_FLOAT
)
515 return R300_FALLBACK_TCL
;
517 emit_vector(ctx
, &rmesa
->state
.aos
[i
],
518 VB
->AttribPtr
[tab
[i
]].data
,
519 t_emit_size(&VB
->AttribPtr
[tab
[i
]]),
520 VB
->AttribPtr
[tab
[i
]].stride
, count
);
523 rmesa
->state
.aos
[i
].aos_size
=
524 t_aos_size(&VB
->AttribPtr
[tab
[i
]]);
526 comp_size
= _mesa_sizeof_type(VB
->AttribPtr
[tab
[i
]].type
);
528 for (fix
= 0; fix
<= 4 - VB
->AttribPtr
[tab
[i
]].size
; fix
++) {
529 if ((rmesa
->state
.aos
[i
].aos_offset
-
530 comp_size
* fix
) % 4)
539 WARN_ONCE("Feeling lucky?\n");
542 rmesa
->state
.aos
[i
].aos_offset
-= comp_size
* fix
;
544 for (ci
= 0; ci
< VB
->AttribPtr
[tab
[i
]].size
; ci
++)
545 swizzle
[i
][ci
] += fix
;
548 ("Cannot handle offset %x with stride %d, comp %d\n",
549 rmesa
->state
.aos
[i
].aos_offset
,
550 rmesa
->state
.aos
[i
].aos_stride
,
551 VB
->AttribPtr
[tab
[i
]].size
);
552 return R300_FALLBACK_TCL
;
556 /* setup INPUT_ROUTE */
557 R300_STATECHANGE(r300
, vir
[0]);
558 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[0].cmd
)->packet0
.count
=
559 t_vir0(&r300
->hw
.vir
[0].cmd
[R300_VIR_CNTL_0
], VB
->AttribPtr
,
562 R300_STATECHANGE(r300
, vir
[1]);
563 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[1].cmd
)->packet0
.count
=
564 t_vir1(&r300
->hw
.vir
[1].cmd
[R300_VIR_CNTL_0
], swizzle
, nr
);
566 /* Set up input_cntl */
567 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
568 R300_STATECHANGE(r300
, vic
);
569 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_0
] = 0x5555; /* Hard coded value, no idea what it means */
570 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_1
] = t_vic(ctx
, InputsRead
);
572 /* Stage 3: VAP output */
574 R300_STATECHANGE(r300
, vof
);
576 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] = 0;
577 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] = 0;
579 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
))
580 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
581 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
;
583 if (OutputsWritten
& (1 << VERT_RESULT_COL0
))
584 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
585 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT
;
587 if (OutputsWritten
& (1 << VERT_RESULT_COL1
))
588 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
589 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT
;
591 /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
592 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
594 if(OutputsWritten & (1 << VERT_RESULT_BFC1))
595 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
596 //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
598 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
599 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
600 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT
;
602 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
603 if (OutputsWritten
& (1 << (VERT_RESULT_TEX0
+ i
)))
604 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] |= (4 << (3 * i
));
606 rmesa
->state
.aos_count
= nr
;
608 return R300_FALLBACK_NONE
;
612 void r300UseArrays(GLcontext
* ctx
)
614 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
617 if (rmesa
->state
.elt_dma
.buf
)
618 r300_mem_use(rmesa
, rmesa
->state
.elt_dma
.buf
->id
);
620 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
621 if (rmesa
->state
.aos
[i
].buf
)
622 r300_mem_use(rmesa
, rmesa
->state
.aos
[i
].buf
->id
);
627 void r300ReleaseArrays(GLcontext
* ctx
)
629 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
632 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.elt_dma
, __FUNCTION__
);
633 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
634 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.aos
[i
], __FUNCTION__
);