2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
33 * \author Keith Whitwell <keith@tungstengraphics.com>
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
46 #include "tnl/t_context.h"
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
67 #define DEBUG_ALL DEBUG_VERTS
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
80 #define COPY_DWORDS( dst, src, nr ) \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
89 static void r300EmitVec4(GLcontext
* ctx
,
90 struct r300_dma_region
*rvb
,
91 GLvoid
* data
, int stride
, int count
)
94 int *out
= (int *)(rvb
->address
+ rvb
->start
);
96 if (RADEON_DEBUG
& DEBUG_VERTS
)
97 fprintf(stderr
, "%s count %d stride %d\n",
98 __FUNCTION__
, count
, stride
);
101 COPY_DWORDS(out
, data
, count
);
103 for (i
= 0; i
< count
; i
++) {
104 out
[0] = *(int *)data
;
110 static void r300EmitVec8(GLcontext
* ctx
,
111 struct r300_dma_region
*rvb
,
112 GLvoid
* data
, int stride
, int count
)
115 int *out
= (int *)(rvb
->address
+ rvb
->start
);
117 if (RADEON_DEBUG
& DEBUG_VERTS
)
118 fprintf(stderr
, "%s count %d stride %d\n",
119 __FUNCTION__
, count
, stride
);
122 COPY_DWORDS(out
, data
, count
* 2);
124 for (i
= 0; i
< count
; i
++) {
125 out
[0] = *(int *)data
;
126 out
[1] = *(int *)(data
+ 4);
132 static void r300EmitVec12(GLcontext
* ctx
,
133 struct r300_dma_region
*rvb
,
134 GLvoid
* data
, int stride
, int count
)
137 int *out
= (int *)(rvb
->address
+ rvb
->start
);
139 if (RADEON_DEBUG
& DEBUG_VERTS
)
140 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
141 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
144 COPY_DWORDS(out
, data
, count
* 3);
146 for (i
= 0; i
< count
; i
++) {
147 out
[0] = *(int *)data
;
148 out
[1] = *(int *)(data
+ 4);
149 out
[2] = *(int *)(data
+ 8);
155 static void r300EmitVec16(GLcontext
* ctx
,
156 struct r300_dma_region
*rvb
,
157 GLvoid
* data
, int stride
, int count
)
160 int *out
= (int *)(rvb
->address
+ rvb
->start
);
162 if (RADEON_DEBUG
& DEBUG_VERTS
)
163 fprintf(stderr
, "%s count %d stride %d\n",
164 __FUNCTION__
, count
, stride
);
167 COPY_DWORDS(out
, data
, count
* 4);
169 for (i
= 0; i
< count
; i
++) {
170 out
[0] = *(int *)data
;
171 out
[1] = *(int *)(data
+ 4);
172 out
[2] = *(int *)(data
+ 8);
173 out
[3] = *(int *)(data
+ 12);
179 static void r300EmitVec(GLcontext
* ctx
,
180 struct r300_dma_region
*rvb
,
181 GLvoid
* data
, int size
, int stride
, int count
)
183 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
185 if (RADEON_DEBUG
& DEBUG_VERTS
)
186 fprintf(stderr
, "%s count %d size %d stride %d\n",
187 __FUNCTION__
, count
, size
, stride
);
189 /* Gets triggered when playing with future_hw_tcl_on ... */
193 r300AllocDmaRegion(rmesa
, rvb
, size
* 4, 4);
195 rvb
->aos_offset
= GET_START(rvb
);
198 r300AllocDmaRegion(rmesa
, rvb
, size
* count
* 4, 4); /* alignment? */
199 rvb
->aos_offset
= GET_START(rvb
);
200 rvb
->aos_stride
= size
;
207 r300EmitVec4(ctx
, rvb
, data
, stride
, count
);
210 r300EmitVec8(ctx
, rvb
, data
, stride
, count
);
213 r300EmitVec12(ctx
, rvb
, data
, stride
, count
);
216 r300EmitVec16(ctx
, rvb
, data
, stride
, count
);
226 #define R300_VIR0_AOS_SIZE_SHIFT 0
227 #define R300_VIR0_AOS_INPUT_SHIFT 8
228 #define R300_VIR0_AOS_STOP_SHIFT 13
229 #define R300_VIR0_AOS_TYPE_SHIFT 14
230 #define R300_VIR0_HIGH_SHIFT 16
232 // Pack 4 elemets in a 16 bit (aos_size first 8, input next 5, 1 stop bit(Whild gues), aos_type last 2);
233 static inline GLuint
t_vir_pack(GLvector4f
** dt
, int *inputs
, int i
)
236 dw
= (dt
[i
]->size
- 1) << R300_VIR0_AOS_SIZE_SHIFT
;
237 dw
|= inputs
[i
] << R300_VIR0_AOS_INPUT_SHIFT
;
238 //dw |= t_type(&dt[i]) << R300_VIR0_AOS_TYPE_SHIFT;
242 static GLuint
t_vir0(uint32_t * dst
, GLvector4f
** dt
, int *inputs
,
243 GLint
* tab
, GLuint nr
)
245 GLuint i
, dw
, dwInternel
;
247 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
248 dw
= t_vir_pack(dt
, inputs
, tab
[i
]);
249 dwInternel
= t_vir_pack(dt
, inputs
, tab
[i
+ 1]);
250 dw
|= dwInternel
<< R300_VIR0_HIGH_SHIFT
;
255 (R300_VIR0_AOS_STOP_SHIFT
+ R300_VIR0_HIGH_SHIFT
));
257 dst
[i
>> 1] = dw
; // Is the same as i/2
261 dw
= t_vir_pack(dt
, inputs
, tab
[nr
- 1]);
262 dw
|= 1 << R300_VIR0_AOS_STOP_SHIFT
;
267 return (nr
+ 1) >> 1; // Is the same as (nr+1)/2
270 static GLuint
t_swizzle(int swizzle
[4])
272 return (swizzle
[0] << R300_INPUT_ROUTE_X_SHIFT
) |
273 (swizzle
[1] << R300_INPUT_ROUTE_Y_SHIFT
) |
274 (swizzle
[2] << R300_INPUT_ROUTE_Z_SHIFT
) |
275 (swizzle
[3] << R300_INPUT_ROUTE_W_SHIFT
);
278 static GLuint
t_vir1(uint32_t * dst
, int swizzle
[][4], GLuint nr
)
282 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
283 dst
[i
>> 1] = t_swizzle(swizzle
[i
]) | R300_INPUT_ROUTE_ENABLE
;
285 (t_swizzle(swizzle
[i
+ 1]) | R300_INPUT_ROUTE_ENABLE
)
291 t_swizzle(swizzle
[nr
- 1]) | R300_INPUT_ROUTE_ENABLE
;
293 return (nr
+ 1) >> 1;
296 static GLuint
t_vic(GLcontext
* ctx
, GLuint InputsRead
)
298 r300ContextPtr r300
= R300_CONTEXT(ctx
);
301 if (InputsRead
& (1 << VERT_ATTRIB_POS
))
302 vic_1
|= R300_INPUT_CNTL_POS
;
304 if (InputsRead
& (1 << VERT_ATTRIB_NORMAL
))
305 vic_1
|= R300_INPUT_CNTL_NORMAL
;
307 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
308 vic_1
|= R300_INPUT_CNTL_COLOR
;
310 r300
->state
.texture
.tc_count
= 0;
311 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
312 if (InputsRead
& (1 << (VERT_ATTRIB_TEX0
+ i
))) {
313 r300
->state
.texture
.tc_count
++;
314 vic_1
|= R300_INPUT_CNTL_TC0
<< i
;
320 /* Emit vertex data to GART memory
321 * Route inputs to the vertex processor
322 * This function should never return R300_FALLBACK_TCL when using software tcl.
325 int r300EmitArrays(GLcontext
* ctx
)
327 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
328 r300ContextPtr r300
= rmesa
;
329 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
330 struct vertex_buffer
*vb
= &tnl
->vb
;
332 GLuint count
= vb
->Count
;
334 GLuint InputsRead
= 0, OutputsWritten
= 0;
336 int vir_inputs
[VERT_ATTRIB_MAX
];
337 GLint tab
[VERT_ATTRIB_MAX
];
338 int swizzle
[VERT_ATTRIB_MAX
][4];
341 struct r300_vertex_program
*prog
=
342 (struct r300_vertex_program
*)
343 CURRENT_VERTEX_SHADER(ctx
);
344 inputs
= prog
->inputs
;
345 InputsRead
= CURRENT_VERTEX_SHADER(ctx
)->key
.InputsRead
;
346 OutputsWritten
= CURRENT_VERTEX_SHADER(ctx
)->key
.OutputsWritten
;
348 DECLARE_RENDERINPUTS(inputs_bitset
);
349 inputs
= r300
->state
.sw_tcl_inputs
;
351 RENDERINPUTS_COPY(inputs_bitset
,
352 TNL_CONTEXT(ctx
)->render_inputs_bitset
);
354 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_POS
));
355 InputsRead
|= 1 << VERT_ATTRIB_POS
;
356 OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
358 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_NORMAL
)
361 assert(RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR0
));
362 InputsRead
|= 1 << VERT_ATTRIB_COLOR0
;
363 OutputsWritten
|= 1 << VERT_RESULT_COL0
;
365 if (RENDERINPUTS_TEST(inputs_bitset
, _TNL_ATTRIB_COLOR1
)) {
366 InputsRead
|= 1 << VERT_ATTRIB_COLOR1
;
367 OutputsWritten
|= 1 << VERT_RESULT_COL1
;
370 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
371 if (RENDERINPUTS_TEST
372 (inputs_bitset
, _TNL_ATTRIB_TEX(i
))) {
373 InputsRead
|= 1 << (VERT_ATTRIB_TEX0
+ i
);
374 OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
377 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
378 if (InputsRead
& (1 << i
))
384 (r300
->radeon
.radeonScreen
->
385 chip_flags
& RADEON_CHIPSET_TCL
)) {
386 /* Fixed, apply to vir0 only */
387 memcpy(vir_inputs
, inputs
,
388 VERT_ATTRIB_MAX
* sizeof(int));
391 if (InputsRead
& VERT_ATTRIB_POS
)
392 inputs
[VERT_ATTRIB_POS
] = 0;
394 if (InputsRead
& (1 << VERT_ATTRIB_COLOR0
))
395 inputs
[VERT_ATTRIB_COLOR0
] = 2;
397 if (InputsRead
& (1 << VERT_ATTRIB_COLOR1
))
398 inputs
[VERT_ATTRIB_COLOR1
] = 3;
400 for (i
= VERT_ATTRIB_TEX0
; i
<= VERT_ATTRIB_TEX7
; i
++)
401 if (InputsRead
& (1 << i
))
402 inputs
[i
] = 6 + (i
- VERT_ATTRIB_TEX0
);
405 RENDERINPUTS_COPY(rmesa
->state
.render_inputs_bitset
,
409 assert(OutputsWritten
);
411 for (i
= 0, nr
= 0; i
< VERT_ATTRIB_MAX
; i
++)
412 if (InputsRead
& (1 << i
))
415 if (nr
> R300_MAX_AOS_ARRAYS
)
416 return R300_FALLBACK_TCL
;
418 for (i
= 0; i
< nr
; i
++) {
420 int comp_size
, fix
, found
= 0;
422 swizzle
[i
][0] = SWIZZLE_ZERO
;
423 swizzle
[i
][1] = SWIZZLE_ZERO
;
424 swizzle
[i
][2] = SWIZZLE_ZERO
;
425 swizzle
[i
][3] = SWIZZLE_ONE
;
427 for (ci
= 0; ci
< vb
->AttribPtr
[tab
[i
]]->size
; ci
++)
430 if (r300IsGartMemory(rmesa
, vb
->AttribPtr
[tab
[i
]]->data
,
431 /*(count-1)*stride */ 4)) {
432 if (vb
->AttribPtr
[tab
[i
]]->stride
% 4)
433 return R300_FALLBACK_TCL
;
435 rmesa
->state
.aos
[i
].address
=
436 (void *)(vb
->AttribPtr
[tab
[i
]]->data
);
437 rmesa
->state
.aos
[i
].start
= 0;
438 rmesa
->state
.aos
[i
].aos_offset
=
439 r300GartOffsetFromVirtual(rmesa
,
441 AttribPtr
[tab
[i
]]->data
);
442 rmesa
->state
.aos
[i
].aos_stride
=
443 vb
->AttribPtr
[tab
[i
]]->stride
/ 4;
445 rmesa
->state
.aos
[i
].aos_size
=
446 vb
->AttribPtr
[tab
[i
]]->size
;
448 r300EmitVec(ctx
, &rmesa
->state
.aos
[i
],
449 vb
->AttribPtr
[tab
[i
]]->data
,
450 vb
->AttribPtr
[tab
[i
]]->size
,
451 vb
->AttribPtr
[tab
[i
]]->stride
, count
);
454 rmesa
->state
.aos
[i
].aos_size
= vb
->AttribPtr
[tab
[i
]]->size
;
456 comp_size
= _mesa_sizeof_type(GL_FLOAT
);
458 for (fix
= 0; fix
<= 4 - vb
->AttribPtr
[tab
[i
]]->size
; fix
++) {
459 if ((rmesa
->state
.aos
[i
].aos_offset
-
460 comp_size
* fix
) % 4)
469 WARN_ONCE("Feeling lucky?\n");
472 rmesa
->state
.aos
[i
].aos_offset
-= comp_size
* fix
;
474 for (ci
= 0; ci
< vb
->AttribPtr
[tab
[i
]]->size
; ci
++)
475 swizzle
[i
][ci
] += fix
;
478 ("Cannot handle offset %x with stride %d, comp %d\n",
479 rmesa
->state
.aos
[i
].aos_offset
,
480 rmesa
->state
.aos
[i
].aos_stride
,
481 vb
->AttribPtr
[tab
[i
]]->size
);
482 return R300_FALLBACK_TCL
;
486 /* setup INPUT_ROUTE */
487 R300_STATECHANGE(r300
, vir
[0]);
488 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[0].cmd
)->packet0
.count
=
489 t_vir0(&r300
->hw
.vir
[0].cmd
[R300_VIR_CNTL_0
], vb
->AttribPtr
,
492 R300_STATECHANGE(r300
, vir
[1]);
493 ((drm_r300_cmd_header_t
*) r300
->hw
.vir
[1].cmd
)->packet0
.count
=
494 t_vir1(&r300
->hw
.vir
[1].cmd
[R300_VIR_CNTL_0
], swizzle
, nr
);
496 /* Set up input_cntl */
497 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
498 R300_STATECHANGE(r300
, vic
);
499 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_0
] = 0x5555; /* Hard coded value, no idea what it means */
500 r300
->hw
.vic
.cmd
[R300_VIC_CNTL_1
] = t_vic(ctx
, InputsRead
);
502 /* Stage 3: VAP output */
504 R300_STATECHANGE(r300
, vof
);
506 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] = 0;
507 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] = 0;
509 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
))
510 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
511 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
;
513 if (OutputsWritten
& (1 << VERT_RESULT_COL0
))
514 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
515 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT
;
517 if (OutputsWritten
& (1 << VERT_RESULT_COL1
))
518 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
519 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT
;
521 /*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
522 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
524 if(OutputsWritten & (1 << VERT_RESULT_BFC1))
525 r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
526 //if(OutputsWritten & (1 << VERT_RESULT_FOGC))
528 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
529 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_0
] |=
530 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT
;
532 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
533 if (OutputsWritten
& (1 << (VERT_RESULT_TEX0
+ i
)))
534 r300
->hw
.vof
.cmd
[R300_VOF_CNTL_1
] |= (4 << (3 * i
));
536 rmesa
->state
.aos_count
= nr
;
538 return R300_FALLBACK_NONE
;
542 void r300UseArrays(GLcontext
* ctx
)
544 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
547 if (rmesa
->state
.elt_dma
.buf
)
548 r300_mem_use(rmesa
, rmesa
->state
.elt_dma
.buf
->id
);
550 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
551 if (rmesa
->state
.aos
[i
].buf
)
552 r300_mem_use(rmesa
, rmesa
->state
.aos
[i
].buf
->id
);
557 void r300ReleaseArrays(GLcontext
* ctx
)
559 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
562 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.elt_dma
, __FUNCTION__
);
563 for (i
= 0; i
< rmesa
->state
.aos_count
; i
++) {
564 r300ReleaseDmaRegion(rmesa
, &rmesa
->state
.aos
[i
], __FUNCTION__
);