2 /**************************************************************************
4 Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
5 Tungsten Graphics Inc, Cedar Park, TX.
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 on the rights to use, copy, modify, merge, publish, distribute, sub
13 license, and/or sell copies of the Software, and to permit persons to whom
14 the Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice (including the next
17 paragraph) shall be included in all copies or substantial portions of the
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keith@tungstengraphics.com>
39 #include "radeon_context.h"
40 #include "radeon_ioctl.h"
41 #include "radeon_sanity.h"
43 /* Set this '1' to get more verbiage.
45 #define MORE_VERBOSE 1
48 #define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE)
52 #define NORMAL (RADEON_DEBUG & DEBUG_VERBOSE)
56 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
57 * 1.3 cmdbuffers allow all previous state to be updated as well as
58 * the tcl scalar and vector areas.
64 } packet
[RADEON_MAX_STATE_PACKETS
] = {
65 { RADEON_PP_MISC
,7,"RADEON_PP_MISC" },
66 { RADEON_PP_CNTL
,3,"RADEON_PP_CNTL" },
67 { RADEON_RB3D_COLORPITCH
,1,"RADEON_RB3D_COLORPITCH" },
68 { RADEON_RE_LINE_PATTERN
,2,"RADEON_RE_LINE_PATTERN" },
69 { RADEON_SE_LINE_WIDTH
,1,"RADEON_SE_LINE_WIDTH" },
70 { RADEON_PP_LUM_MATRIX
,1,"RADEON_PP_LUM_MATRIX" },
71 { RADEON_PP_ROT_MATRIX_0
,2,"RADEON_PP_ROT_MATRIX_0" },
72 { RADEON_RB3D_STENCILREFMASK
,3,"RADEON_RB3D_STENCILREFMASK" },
73 { RADEON_SE_VPORT_XSCALE
,6,"RADEON_SE_VPORT_XSCALE" },
74 { RADEON_SE_CNTL
,2,"RADEON_SE_CNTL" },
75 { RADEON_SE_CNTL_STATUS
,1,"RADEON_SE_CNTL_STATUS" },
76 { RADEON_RE_MISC
,1,"RADEON_RE_MISC" },
77 { RADEON_PP_TXFILTER_0
,6,"RADEON_PP_TXFILTER_0" },
78 { RADEON_PP_BORDER_COLOR_0
,1,"RADEON_PP_BORDER_COLOR_0" },
79 { RADEON_PP_TXFILTER_1
,6,"RADEON_PP_TXFILTER_1" },
80 { RADEON_PP_BORDER_COLOR_1
,1,"RADEON_PP_BORDER_COLOR_1" },
81 { RADEON_PP_TXFILTER_2
,6,"RADEON_PP_TXFILTER_2" },
82 { RADEON_PP_BORDER_COLOR_2
,1,"RADEON_PP_BORDER_COLOR_2" },
83 { RADEON_SE_ZBIAS_FACTOR
,2,"RADEON_SE_ZBIAS_FACTOR" },
84 { RADEON_SE_TCL_OUTPUT_VTX_FMT
,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
85 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED
,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
86 { 0, 4, "R200_PP_TXCBLEND_0" },
87 { 0, 4, "R200_PP_TXCBLEND_1" },
88 { 0, 4, "R200_PP_TXCBLEND_2" },
89 { 0, 4, "R200_PP_TXCBLEND_3" },
90 { 0, 4, "R200_PP_TXCBLEND_4" },
91 { 0, 4, "R200_PP_TXCBLEND_5" },
92 { 0, 4, "R200_PP_TXCBLEND_6" },
93 { 0, 4, "R200_PP_TXCBLEND_7" },
94 { 0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
95 { 0, 6, "R200_PP_TFACTOR_0" },
96 { 0, 4, "R200_SE_VTX_FMT_0" },
97 { 0, 1, "R200_SE_VAP_CNTL" },
98 { 0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
99 { 0, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
100 { 0, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
101 { 0, 6, "R200_PP_TXFILTER_0" },
102 { 0, 6, "R200_PP_TXFILTER_1" },
103 { 0, 6, "R200_PP_TXFILTER_2" },
104 { 0, 6, "R200_PP_TXFILTER_3" },
105 { 0, 6, "R200_PP_TXFILTER_4" },
106 { 0, 6, "R200_PP_TXFILTER_5" },
107 { 0, 1, "R200_PP_TXOFFSET_0" },
108 { 0, 1, "R200_PP_TXOFFSET_1" },
109 { 0, 1, "R200_PP_TXOFFSET_2" },
110 { 0, 1, "R200_PP_TXOFFSET_3" },
111 { 0, 1, "R200_PP_TXOFFSET_4" },
112 { 0, 1, "R200_PP_TXOFFSET_5" },
113 { 0, 1, "R200_SE_VTE_CNTL" },
114 { 0, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
115 { 0, 1, "R200_PP_TAM_DEBUG3" },
116 { 0, 1, "R200_PP_CNTL_X" },
117 { 0, 1, "R200_RB3D_DEPTHXY_OFFSET" },
118 { 0, 1, "R200_RE_AUX_SCISSOR_CNTL" },
119 { 0, 2, "R200_RE_SCISSOR_TL_0" },
120 { 0, 2, "R200_RE_SCISSOR_TL_1" },
121 { 0, 2, "R200_RE_SCISSOR_TL_2" },
122 { 0, 1, "R200_SE_VAP_CNTL_STATUS" },
123 { 0, 1, "R200_SE_VTX_STATE_CNTL" },
124 { 0, 1, "R200_RE_POINTSIZE" },
125 { 0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
126 { 0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
127 { 0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
128 { 0, 1, "R200_PP_CUBIC_FACES_1" },
129 { 0, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
130 { 0, 1, "R200_PP_CUBIC_FACES_2" },
131 { 0, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
132 { 0, 1, "R200_PP_CUBIC_FACES_3" },
133 { 0, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
134 { 0, 1, "R200_PP_CUBIC_FACES_4" },
135 { 0, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
136 { 0, 1, "R200_PP_CUBIC_FACES_5" },
137 { 0, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
138 { RADEON_PP_TEX_SIZE_0
, 2, "RADEON_PP_TEX_SIZE_0" },
139 { RADEON_PP_TEX_SIZE_1
, 2, "RADEON_PP_TEX_SIZE_1" },
140 { RADEON_PP_TEX_SIZE_2
, 2, "RADEON_PP_TEX_SIZE_1" },
148 static struct reg_names reg_names
[] = {
149 { RADEON_PP_MISC
, "RADEON_PP_MISC" },
150 { RADEON_PP_FOG_COLOR
, "RADEON_PP_FOG_COLOR" },
151 { RADEON_RE_SOLID_COLOR
, "RADEON_RE_SOLID_COLOR" },
152 { RADEON_RB3D_BLENDCNTL
, "RADEON_RB3D_BLENDCNTL" },
153 { RADEON_RB3D_DEPTHOFFSET
, "RADEON_RB3D_DEPTHOFFSET" },
154 { RADEON_RB3D_DEPTHPITCH
, "RADEON_RB3D_DEPTHPITCH" },
155 { RADEON_RB3D_ZSTENCILCNTL
, "RADEON_RB3D_ZSTENCILCNTL" },
156 { RADEON_PP_CNTL
, "RADEON_PP_CNTL" },
157 { RADEON_RB3D_CNTL
, "RADEON_RB3D_CNTL" },
158 { RADEON_RB3D_COLOROFFSET
, "RADEON_RB3D_COLOROFFSET" },
159 { RADEON_RB3D_COLORPITCH
, "RADEON_RB3D_COLORPITCH" },
160 { RADEON_SE_CNTL
, "RADEON_SE_CNTL" },
161 { RADEON_SE_COORD_FMT
, "RADEON_SE_COORDFMT" },
162 { RADEON_SE_CNTL_STATUS
, "RADEON_SE_CNTL_STATUS" },
163 { RADEON_RE_LINE_PATTERN
, "RADEON_RE_LINE_PATTERN" },
164 { RADEON_RE_LINE_STATE
, "RADEON_RE_LINE_STATE" },
165 { RADEON_SE_LINE_WIDTH
, "RADEON_SE_LINE_WIDTH" },
166 { RADEON_RB3D_STENCILREFMASK
, "RADEON_RB3D_STENCILREFMASK" },
167 { RADEON_RB3D_ROPCNTL
, "RADEON_RB3D_ROPCNTL" },
168 { RADEON_RB3D_PLANEMASK
, "RADEON_RB3D_PLANEMASK" },
169 { RADEON_SE_VPORT_XSCALE
, "RADEON_SE_VPORT_XSCALE" },
170 { RADEON_SE_VPORT_XOFFSET
, "RADEON_SE_VPORT_XOFFSET" },
171 { RADEON_SE_VPORT_YSCALE
, "RADEON_SE_VPORT_YSCALE" },
172 { RADEON_SE_VPORT_YOFFSET
, "RADEON_SE_VPORT_YOFFSET" },
173 { RADEON_SE_VPORT_ZSCALE
, "RADEON_SE_VPORT_ZSCALE" },
174 { RADEON_SE_VPORT_ZOFFSET
, "RADEON_SE_VPORT_ZOFFSET" },
175 { RADEON_RE_MISC
, "RADEON_RE_MISC" },
176 { RADEON_PP_TXFILTER_0
, "RADEON_PP_TXFILTER_0" },
177 { RADEON_PP_TXFILTER_1
, "RADEON_PP_TXFILTER_1" },
178 { RADEON_PP_TXFILTER_2
, "RADEON_PP_TXFILTER_2" },
179 { RADEON_PP_TXFORMAT_0
, "RADEON_PP_TXFORMAT_0" },
180 { RADEON_PP_TXFORMAT_1
, "RADEON_PP_TXFORMAT_1" },
181 { RADEON_PP_TXFORMAT_2
, "RADEON_PP_TXFORMAT_3" },
182 { RADEON_PP_TXOFFSET_0
, "RADEON_PP_TXOFFSET_0" },
183 { RADEON_PP_TXOFFSET_1
, "RADEON_PP_TXOFFSET_1" },
184 { RADEON_PP_TXOFFSET_2
, "RADEON_PP_TXOFFSET_3" },
185 { RADEON_PP_TXCBLEND_0
, "RADEON_PP_TXCBLEND_0" },
186 { RADEON_PP_TXCBLEND_1
, "RADEON_PP_TXCBLEND_1" },
187 { RADEON_PP_TXCBLEND_2
, "RADEON_PP_TXCBLEND_3" },
188 { RADEON_PP_TXABLEND_0
, "RADEON_PP_TXABLEND_0" },
189 { RADEON_PP_TXABLEND_1
, "RADEON_PP_TXABLEND_1" },
190 { RADEON_PP_TXABLEND_2
, "RADEON_PP_TXABLEND_3" },
191 { RADEON_PP_TFACTOR_0
, "RADEON_PP_TFACTOR_0" },
192 { RADEON_PP_TFACTOR_1
, "RADEON_PP_TFACTOR_1" },
193 { RADEON_PP_TFACTOR_2
, "RADEON_PP_TFACTOR_3" },
194 { RADEON_PP_BORDER_COLOR_0
, "RADEON_PP_BORDER_COLOR_0" },
195 { RADEON_PP_BORDER_COLOR_1
, "RADEON_PP_BORDER_COLOR_1" },
196 { RADEON_PP_BORDER_COLOR_2
, "RADEON_PP_BORDER_COLOR_3" },
197 { RADEON_SE_ZBIAS_FACTOR
, "RADEON_SE_ZBIAS_FACTOR" },
198 { RADEON_SE_ZBIAS_CONSTANT
, "RADEON_SE_ZBIAS_CONSTANT" },
199 { RADEON_SE_TCL_OUTPUT_VTX_FMT
, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
200 { RADEON_SE_TCL_OUTPUT_VTX_SEL
, "RADEON_SE_TCL_OUTPUT_VTXSEL" },
201 { RADEON_SE_TCL_MATRIX_SELECT_0
, "RADEON_SE_TCL_MATRIX_SELECT_0" },
202 { RADEON_SE_TCL_MATRIX_SELECT_1
, "RADEON_SE_TCL_MATRIX_SELECT_1" },
203 { RADEON_SE_TCL_UCP_VERT_BLEND_CTL
, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" },
204 { RADEON_SE_TCL_TEXTURE_PROC_CTL
, "RADEON_SE_TCL_TEXTURE_PROC_CTL" },
205 { RADEON_SE_TCL_LIGHT_MODEL_CTL
, "RADEON_SE_TCL_LIGHT_MODEL_CTL" },
206 { RADEON_SE_TCL_PER_LIGHT_CTL_0
, "RADEON_SE_TCL_PER_LIGHT_CTL_0" },
207 { RADEON_SE_TCL_PER_LIGHT_CTL_1
, "RADEON_SE_TCL_PER_LIGHT_CTL_1" },
208 { RADEON_SE_TCL_PER_LIGHT_CTL_2
, "RADEON_SE_TCL_PER_LIGHT_CTL_2" },
209 { RADEON_SE_TCL_PER_LIGHT_CTL_3
, "RADEON_SE_TCL_PER_LIGHT_CTL_3" },
210 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED
, "RADEON_SE_TCL_EMMISSIVE_RED" },
211 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN
, "RADEON_SE_TCL_EMMISSIVE_GREEN" },
212 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE
, "RADEON_SE_TCL_EMMISSIVE_BLUE" },
213 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA
, "RADEON_SE_TCL_EMMISSIVE_ALPHA" },
214 { RADEON_SE_TCL_MATERIAL_AMBIENT_RED
, "RADEON_SE_TCL_AMBIENT_RED" },
215 { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN
, "RADEON_SE_TCL_AMBIENT_GREEN" },
216 { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE
, "RADEON_SE_TCL_AMBIENT_BLUE" },
217 { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA
, "RADEON_SE_TCL_AMBIENT_ALPHA" },
218 { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED
, "RADEON_SE_TCL_DIFFUSE_RED" },
219 { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN
, "RADEON_SE_TCL_DIFFUSE_GREEN" },
220 { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE
, "RADEON_SE_TCL_DIFFUSE_BLUE" },
221 { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA
, "RADEON_SE_TCL_DIFFUSE_ALPHA" },
222 { RADEON_SE_TCL_MATERIAL_SPECULAR_RED
, "RADEON_SE_TCL_SPECULAR_RED" },
223 { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN
, "RADEON_SE_TCL_SPECULAR_GREEN" },
224 { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE
, "RADEON_SE_TCL_SPECULAR_BLUE" },
225 { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA
, "RADEON_SE_TCL_SPECULAR_ALPHA" },
226 { RADEON_SE_TCL_SHININESS
, "RADEON_SE_TCL_SHININESS" },
227 { RADEON_SE_COORD_FMT
, "RADEON_SE_COORD_FMT" },
228 { RADEON_PP_TEX_SIZE_0
, "RADEON_PP_TEX_SIZE_0" },
229 { RADEON_PP_TEX_SIZE_1
, "RADEON_PP_TEX_SIZE_1" },
230 { RADEON_PP_TEX_SIZE_2
, "RADEON_PP_TEX_SIZE_2" },
231 { RADEON_PP_TEX_SIZE_0
+4, "RADEON_PP_TEX_PITCH_0" },
232 { RADEON_PP_TEX_SIZE_1
+4, "RADEON_PP_TEX_PITCH_1" },
233 { RADEON_PP_TEX_SIZE_2
+4, "RADEON_PP_TEX_PITCH_2" },
236 static struct reg_names scalar_names
[] = {
237 { RADEON_SS_LIGHT_DCD_ADDR
, "LIGHT_DCD" },
238 { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR
, "LIGHT_SPOT_EXPONENT" },
239 { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR
, "LIGHT_SPOT_CUTOFF" },
240 { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR
, "LIGHT_SPECULAR_THRESH" },
241 { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR
, "LIGHT_RANGE_CUTOFF" },
242 { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR
, "VERT_GUARD_CLIP" },
243 { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR
, "VERT_GUARD_DISCARD" },
244 { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR
, "HORZ_GUARD_CLIP" },
245 { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR
, "HORZ_GUARD_DISCARD" },
246 { RADEON_SS_SHININESS
, "SHININESS" },
250 /* Puff these out to make them look like normal (dword) registers.
252 static struct reg_names vector_names
[] = {
253 { RADEON_VS_MATRIX_0_ADDR
* 4, "MATRIX_0" },
254 { RADEON_VS_MATRIX_1_ADDR
* 4, "MATRIX_1" },
255 { RADEON_VS_MATRIX_2_ADDR
* 4, "MATRIX_2" },
256 { RADEON_VS_MATRIX_3_ADDR
* 4, "MATRIX_3" },
257 { RADEON_VS_MATRIX_4_ADDR
* 4, "MATRIX_4" },
258 { RADEON_VS_MATRIX_5_ADDR
* 4, "MATRIX_5" },
259 { RADEON_VS_MATRIX_6_ADDR
* 4, "MATRIX_6" },
260 { RADEON_VS_MATRIX_7_ADDR
* 4, "MATRIX_7" },
261 { RADEON_VS_MATRIX_8_ADDR
* 4, "MATRIX_8" },
262 { RADEON_VS_MATRIX_9_ADDR
* 4, "MATRIX_9" },
263 { RADEON_VS_MATRIX_10_ADDR
* 4, "MATRIX_10" },
264 { RADEON_VS_MATRIX_11_ADDR
* 4, "MATRIX_11" },
265 { RADEON_VS_MATRIX_12_ADDR
* 4, "MATRIX_12" },
266 { RADEON_VS_MATRIX_13_ADDR
* 4, "MATRIX_13" },
267 { RADEON_VS_MATRIX_14_ADDR
* 4, "MATRIX_14" },
268 { RADEON_VS_MATRIX_15_ADDR
* 4, "MATRIX_15" },
269 { RADEON_VS_LIGHT_AMBIENT_ADDR
* 4, "LIGHT_AMBIENT" },
270 { RADEON_VS_LIGHT_DIFFUSE_ADDR
* 4, "LIGHT_DIFFUSE" },
271 { RADEON_VS_LIGHT_SPECULAR_ADDR
* 4, "LIGHT_SPECULAR" },
272 { RADEON_VS_LIGHT_DIRPOS_ADDR
* 4, "LIGHT_DIRPOS" },
273 { RADEON_VS_LIGHT_HWVSPOT_ADDR
* 4, "LIGHT_HWVSPOT" },
274 { RADEON_VS_LIGHT_ATTENUATION_ADDR
* 4, "LIGHT_ATTENUATION" },
275 { RADEON_VS_MATRIX_EYE2CLIP_ADDR
* 4, "MATRIX_EYE2CLIP" },
276 { RADEON_VS_UCP_ADDR
* 4, "UCP" },
277 { RADEON_VS_GLOBAL_AMBIENT_ADDR
* 4, "GLOBAL_AMBIENT" },
278 { RADEON_VS_FOG_PARAM_ADDR
* 4, "FOG_PARAM" },
279 { RADEON_VS_EYE_VECTOR_ADDR
* 4, "EYE_VECTOR" },
283 union fi
{ float f
; int i
; };
291 struct reg_names
*closest
;
301 static struct reg regs
[Elements(reg_names
)+1];
302 static struct reg scalars
[512+1];
303 static struct reg vectors
[512*4+1];
305 static int total
, total_changed
, bufs
;
307 static void init_regs( void )
309 struct reg_names
*tmp
;
312 for (i
= 0 ; i
< Elements(regs
) ; i
++) {
313 regs
[i
].idx
= reg_names
[i
].idx
;
314 regs
[i
].closest
= ®_names
[i
];
318 for (i
= 0, tmp
= scalar_names
; i
< Elements(scalars
) ; i
++) {
319 if (tmp
[1].idx
== i
) tmp
++;
321 scalars
[i
].closest
= tmp
;
322 scalars
[i
].flags
= ISFLOAT
;
325 for (i
= 0, tmp
= vector_names
; i
< Elements(vectors
) ; i
++) {
326 if (tmp
[1].idx
*4 == i
) tmp
++;
328 vectors
[i
].closest
= tmp
;
329 vectors
[i
].flags
= ISFLOAT
|ISVEC
;
332 regs
[Elements(regs
)-1].idx
= -1;
333 scalars
[Elements(scalars
)-1].idx
= -1;
334 vectors
[Elements(vectors
)-1].idx
= -1;
337 static int find_or_add_value( struct reg
*reg
, int val
)
341 for ( j
= 0 ; j
< reg
->nvalues
; j
++)
342 if ( val
== reg
->values
[j
].i
)
345 if (j
== reg
->nalloc
) {
348 reg
->values
= (union fi
*) realloc( reg
->values
,
349 reg
->nalloc
* sizeof(union fi
) );
352 reg
->values
[reg
->nvalues
++].i
= val
;
356 static struct reg
*lookup_reg( struct reg
*tab
, int reg
)
360 for (i
= 0 ; tab
[i
].idx
!= -1 ; i
++) {
361 if (tab
[i
].idx
== reg
)
365 fprintf(stderr
, "*** unknown reg 0x%x\n", reg
);
370 static const char *get_reg_name( struct reg
*reg
)
374 if (reg
->idx
== reg
->closest
->idx
)
375 return reg
->closest
->name
;
378 if (reg
->flags
& ISVEC
) {
379 if (reg
->idx
/4 != reg
->closest
->idx
)
380 sprintf(tmp
, "%s+%d[%d]",
382 (reg
->idx
/4) - reg
->closest
->idx
,
385 sprintf(tmp
, "%s[%d]", reg
->closest
->name
, reg
->idx
%4);
388 if (reg
->idx
!= reg
->closest
->idx
)
389 sprintf(tmp
, "%s+%d", reg
->closest
->name
, reg
->idx
- reg
->closest
->idx
);
391 sprintf(tmp
, "%s", reg
->closest
->name
);
397 static int print_int_reg_assignment( struct reg
*reg
, int data
)
399 int changed
= (reg
->current
.i
!= data
);
400 int ever_seen
= find_or_add_value( reg
, data
);
402 if (VERBOSE
|| (NORMAL
&& (changed
|| !ever_seen
)))
403 fprintf(stderr
, " %s <-- 0x%x", get_reg_name(reg
), data
);
407 fprintf(stderr
, " *** BRAND NEW VALUE");
409 fprintf(stderr
, " *** CHANGED");
412 reg
->current
.i
= data
;
414 if (VERBOSE
|| (NORMAL
&& (changed
|| !ever_seen
)))
415 fprintf(stderr
, "\n");
421 static int print_float_reg_assignment( struct reg
*reg
, float data
)
423 int changed
= (reg
->current
.f
!= data
);
424 int newmin
= (data
< reg
->vmin
);
425 int newmax
= (data
> reg
->vmax
);
427 if (VERBOSE
|| (NORMAL
&& (newmin
|| newmax
|| changed
)))
428 fprintf(stderr
, " %s <-- %.3f", get_reg_name(reg
), data
);
432 fprintf(stderr
, " *** NEW MIN (prev %.3f)", reg
->vmin
);
436 fprintf(stderr
, " *** NEW MAX (prev %.3f)", reg
->vmax
);
440 fprintf(stderr
, " *** CHANGED");
444 reg
->current
.f
= data
;
446 if (VERBOSE
|| (NORMAL
&& (newmin
|| newmax
|| changed
)))
447 fprintf(stderr
, "\n");
452 static int print_reg_assignment( struct reg
*reg
, int data
)
454 reg
->flags
|= TOUCHED
;
455 if (reg
->flags
& ISFLOAT
)
456 return print_float_reg_assignment( reg
, *(float *)&data
);
458 return print_int_reg_assignment( reg
, data
);
461 static void print_reg( struct reg
*reg
)
463 if (reg
->flags
& TOUCHED
) {
464 if (reg
->flags
& ISFLOAT
) {
465 fprintf(stderr
, " %s == %f\n", get_reg_name(reg
), reg
->current
.f
);
467 fprintf(stderr
, " %s == 0x%x\n", get_reg_name(reg
), reg
->current
.i
);
473 static void dump_state( void )
477 for (i
= 0 ; i
< Elements(regs
) ; i
++)
478 print_reg( ®s
[i
] );
480 for (i
= 0 ; i
< Elements(scalars
) ; i
++)
481 print_reg( &scalars
[i
] );
483 for (i
= 0 ; i
< Elements(vectors
) ; i
++)
484 print_reg( &vectors
[i
] );
489 static int radeon_emit_packets(
490 drmRadeonCmdHeader header
,
491 drmRadeonCmdBuffer
*cmdbuf
)
493 int id
= (int)header
.packet
.packet_id
;
494 int sz
= packet
[id
].len
;
495 int *data
= (int *)cmdbuf
->buf
;
498 if (sz
* sizeof(int) > cmdbuf
->bufsz
) {
499 fprintf(stderr
, "Packet overflows cmdbuf\n");
503 if (!packet
[id
].name
) {
504 fprintf(stderr
, "*** Unknown packet 0 nr %d\n", id
);
510 fprintf(stderr
, "Packet 0 reg %s nr %d\n", packet
[id
].name
, sz
);
512 for ( i
= 0 ; i
< sz
; i
++) {
513 struct reg
*reg
= lookup_reg( regs
, packet
[id
].start
+ i
*4 );
514 if (print_reg_assignment( reg
, data
[i
] ))
519 cmdbuf
->buf
+= sz
* sizeof(int);
520 cmdbuf
->bufsz
-= sz
* sizeof(int);
525 static int radeon_emit_scalars(
526 drmRadeonCmdHeader header
,
527 drmRadeonCmdBuffer
*cmdbuf
)
529 int sz
= header
.scalars
.count
;
530 int *data
= (int *)cmdbuf
->buf
;
531 int start
= header
.scalars
.offset
;
532 int stride
= header
.scalars
.stride
;
536 fprintf(stderr
, "emit scalars, start %d stride %d nr %d (end %d)\n",
537 start
, stride
, sz
, start
+ stride
* sz
);
540 for (i
= 0 ; i
< sz
; i
++, start
+= stride
) {
541 struct reg
*reg
= lookup_reg( scalars
, start
);
542 if (print_reg_assignment( reg
, data
[i
] ))
547 cmdbuf
->buf
+= sz
* sizeof(int);
548 cmdbuf
->bufsz
-= sz
* sizeof(int);
553 static int radeon_emit_scalars2(
554 drmRadeonCmdHeader header
,
555 drmRadeonCmdBuffer
*cmdbuf
)
557 int sz
= header
.scalars
.count
;
558 int *data
= (int *)cmdbuf
->buf
;
559 int start
= header
.scalars
.offset
+ 0x100;
560 int stride
= header
.scalars
.stride
;
564 fprintf(stderr
, "emit scalars2, start %d stride %d nr %d (end %d)\n",
565 start
, stride
, sz
, start
+ stride
* sz
);
567 if (start
+ stride
* sz
> 257) {
568 fprintf(stderr
, "emit scalars OVERFLOW %d/%d/%d\n", start
, stride
, sz
);
572 for (i
= 0 ; i
< sz
; i
++, start
+= stride
) {
573 struct reg
*reg
= lookup_reg( scalars
, start
);
574 if (print_reg_assignment( reg
, data
[i
] ))
579 cmdbuf
->buf
+= sz
* sizeof(int);
580 cmdbuf
->bufsz
-= sz
* sizeof(int);
584 /* Check: inf/nan/extreme-size?
585 * Check: table start, end, nr, etc.
587 static int radeon_emit_vectors(
588 drmRadeonCmdHeader header
,
589 drmRadeonCmdBuffer
*cmdbuf
)
591 int sz
= header
.vectors
.count
;
592 int *data
= (int *)cmdbuf
->buf
;
593 int start
= header
.vectors
.offset
;
594 int stride
= header
.vectors
.stride
;
598 fprintf(stderr
, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
599 start
, stride
, sz
, start
+ stride
* sz
, header
.i
);
601 /* if (start + stride * (sz/4) > 128) { */
602 /* fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
606 for (i
= 0 ; i
< sz
; start
+= stride
) {
608 for (j
= 0 ; j
< 4 ; i
++,j
++) {
609 struct reg
*reg
= lookup_reg( vectors
, start
*4+j
);
610 if (print_reg_assignment( reg
, data
[i
] ))
619 cmdbuf
->buf
+= sz
* sizeof(int);
620 cmdbuf
->bufsz
-= sz
* sizeof(int);
625 static int print_vertex_format( int vfmt
)
628 fprintf(stderr
, " %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
632 (vfmt
& RADEON_CP_VC_FRMT_Z
) ? "z," : "",
633 (vfmt
& RADEON_CP_VC_FRMT_W0
) ? "w0," : "",
634 (vfmt
& RADEON_CP_VC_FRMT_FPCOLOR
) ? "fpcolor," : "",
635 (vfmt
& RADEON_CP_VC_FRMT_FPALPHA
) ? "fpalpha," : "",
636 (vfmt
& RADEON_CP_VC_FRMT_PKCOLOR
) ? "pkcolor," : "",
637 (vfmt
& RADEON_CP_VC_FRMT_FPSPEC
) ? "fpspec," : "",
638 (vfmt
& RADEON_CP_VC_FRMT_FPFOG
) ? "fpfog," : "",
639 (vfmt
& RADEON_CP_VC_FRMT_PKSPEC
) ? "pkspec," : "",
640 (vfmt
& RADEON_CP_VC_FRMT_ST0
) ? "st0," : "",
641 (vfmt
& RADEON_CP_VC_FRMT_ST1
) ? "st1," : "",
642 (vfmt
& RADEON_CP_VC_FRMT_Q1
) ? "q1," : "",
643 (vfmt
& RADEON_CP_VC_FRMT_ST2
) ? "st2," : "",
644 (vfmt
& RADEON_CP_VC_FRMT_Q2
) ? "q2," : "",
645 (vfmt
& RADEON_CP_VC_FRMT_ST3
) ? "st3," : "",
646 (vfmt
& RADEON_CP_VC_FRMT_Q3
) ? "q3," : "",
647 (vfmt
& RADEON_CP_VC_FRMT_Q0
) ? "q0," : "",
648 (vfmt
& RADEON_CP_VC_FRMT_N0
) ? "n0," : "",
649 (vfmt
& RADEON_CP_VC_FRMT_XY1
) ? "xy1," : "",
650 (vfmt
& RADEON_CP_VC_FRMT_Z1
) ? "z1," : "",
651 (vfmt
& RADEON_CP_VC_FRMT_W1
) ? "w1," : "",
652 (vfmt
& RADEON_CP_VC_FRMT_N1
) ? "n1," : "");
655 /* if (!find_or_add_value( &others[V_VTXFMT], vfmt )) */
656 /* fprintf(stderr, " *** NEW VALUE"); */
658 fprintf(stderr
, "\n");
664 static char *primname
[0xf] = {
678 static int print_prim_and_flags( int prim
)
683 fprintf(stderr
, " %s(%x): %s%s%s%s%s%s%s\n",
686 ((prim
& 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND
) ? "IND," : "",
687 ((prim
& 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST
) ? "LIST," : "",
688 ((prim
& 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING
) ? "RING," : "",
689 (prim
& RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA
) ? "RGBA," : "BGRA, ",
690 (prim
& RADEON_CP_VC_CNTL_MAOS_ENABLE
) ? "MAOS," : "",
691 (prim
& RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE
) ? "RADEON," : "",
692 (prim
& RADEON_CP_VC_CNTL_TCL_ENABLE
) ? "TCL," : "");
694 if ((prim
& 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST
) {
695 fprintf(stderr
, " *** Bad primitive: %x\n", prim
& 0xf);
702 fprintf(stderr
, " prim: %s numverts %d\n", primname
[prim
&0xf], numverts
);
704 switch (prim
& 0xf) {
705 case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE
:
706 case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
:
708 fprintf(stderr
, "Bad nr verts for line %d\n", numverts
);
712 case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
:
713 if ((numverts
& 1) || numverts
== 0) {
714 fprintf(stderr
, "Bad nr verts for line %d\n", numverts
);
718 case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
:
720 fprintf(stderr
, "Bad nr verts for line_strip %d\n", numverts
);
724 case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
:
725 case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST
:
726 case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST
:
727 case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST
:
728 if (numverts
% 3 || numverts
== 0) {
729 fprintf(stderr
, "Bad nr verts for tri %d\n", numverts
);
733 case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
:
734 case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
:
736 fprintf(stderr
, "Bad nr verts for strip/fan %d\n", numverts
);
741 fprintf(stderr
, "Bad primitive\n");
747 /* build in knowledge about each packet type
749 static int radeon_emit_packet3( drmRadeonCmdBuffer
*cmdbuf
)
752 int *cmd
= (int *)cmdbuf
->buf
;
754 int i
, stride
, size
, start
;
756 cmdsz
= 2 + ((cmd
[0] & RADEON_CP_PACKET_COUNT_MASK
) >> 16);
758 if ((cmd
[0] & RADEON_CP_PACKET_MASK
) != RADEON_CP_PACKET3
||
759 cmdsz
* 4 > cmdbuf
->bufsz
||
760 cmdsz
> RADEON_CP_PACKET_MAX_DWORDS
) {
761 fprintf(stderr
, "Bad packet\n");
765 switch( cmd
[0] & ~RADEON_CP_PACKET_COUNT_MASK
) {
766 case RADEON_CP_PACKET3_NOP
:
768 fprintf(stderr
, "PACKET3_NOP, %d dwords\n", cmdsz
);
770 case RADEON_CP_PACKET3_NEXT_CHAR
:
772 fprintf(stderr
, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz
);
774 case RADEON_CP_PACKET3_PLY_NEXTSCAN
:
776 fprintf(stderr
, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz
);
778 case RADEON_CP_PACKET3_SET_SCISSORS
:
780 fprintf(stderr
, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz
);
782 case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM
:
784 fprintf(stderr
, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n",
787 case RADEON_CP_PACKET3_LOAD_MICROCODE
:
789 fprintf(stderr
, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz
);
791 case RADEON_CP_PACKET3_WAIT_FOR_IDLE
:
793 fprintf(stderr
, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz
);
796 case RADEON_CP_PACKET3_3D_DRAW_VBUF
:
798 fprintf(stderr
, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz
);
799 print_vertex_format(cmd
[1]);
800 print_prim_and_flags(cmd
[2]);
803 case RADEON_CP_PACKET3_3D_DRAW_IMMD
:
805 fprintf(stderr
, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz
);
807 case RADEON_CP_PACKET3_3D_DRAW_INDX
: {
810 fprintf(stderr
, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz
);
811 print_vertex_format(cmd
[1]);
812 print_prim_and_flags(cmd
[2]);
813 neltdwords
= cmd
[2]>>16;
814 neltdwords
+= neltdwords
& 1;
816 if (neltdwords
+ 3 != cmdsz
)
817 fprintf(stderr
, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
821 case RADEON_CP_PACKET3_LOAD_PALETTE
:
823 fprintf(stderr
, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz
);
825 case RADEON_CP_PACKET3_3D_LOAD_VBPNTR
:
827 fprintf(stderr
, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz
);
828 fprintf(stderr
, " nr arrays: %d\n", cmd
[1]);
831 if (cmd
[1]/2 + cmd
[1]%2 != cmdsz
- 3) {
832 fprintf(stderr
, " ****** MISMATCH %d/%d *******\n",
833 cmd
[1]/2 + cmd
[1]%2 + 3, cmdsz
);
839 for (i
= 0 ; i
< cmd
[1] ; i
++) {
841 stride
= (tmp
[0]>>24) & 0xff;
842 size
= (tmp
[0]>>16) & 0xff;
847 stride
= (tmp
[0]>>8) & 0xff;
848 size
= (tmp
[0]) & 0xff;
851 fprintf(stderr
, " array %d: start 0x%x vsize %d vstride %d\n",
852 i
, start
, size
, stride
);
856 case RADEON_CP_PACKET3_CNTL_PAINT
:
858 fprintf(stderr
, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz
);
860 case RADEON_CP_PACKET3_CNTL_BITBLT
:
862 fprintf(stderr
, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz
);
864 case RADEON_CP_PACKET3_CNTL_SMALLTEXT
:
866 fprintf(stderr
, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz
);
868 case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT
:
870 fprintf(stderr
, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n",
873 case RADEON_CP_PACKET3_CNTL_POLYLINE
:
875 fprintf(stderr
, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz
);
877 case RADEON_CP_PACKET3_CNTL_POLYSCANLINES
:
879 fprintf(stderr
, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n",
882 case RADEON_CP_PACKET3_CNTL_PAINT_MULTI
:
884 fprintf(stderr
, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n",
887 case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI
:
889 fprintf(stderr
, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n",
892 case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT
:
894 fprintf(stderr
, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n",
898 fprintf(stderr
, "UNKNOWN PACKET, %d dwords\n", cmdsz
);
902 cmdbuf
->buf
+= cmdsz
* 4;
903 cmdbuf
->bufsz
-= cmdsz
* 4;
908 /* Check cliprects for bounds, then pass on to above:
910 static int radeon_emit_packet3_cliprect( drmRadeonCmdBuffer
*cmdbuf
)
912 XF86DRIClipRectRec
*boxes
= (XF86DRIClipRectRec
*)cmdbuf
->boxes
;
915 if (VERBOSE
&& total_changed
) {
919 else fprintf(stderr
, "total_changed zero\n");
923 if ( i
< cmdbuf
->nbox
) {
924 fprintf(stderr
, "Emit box %d/%d %d,%d %d,%d\n",
926 boxes
[i
].x1
, boxes
[i
].y1
, boxes
[i
].x2
, boxes
[i
].y2
);
928 } while ( ++i
< cmdbuf
->nbox
);
931 if (cmdbuf
->nbox
== 1)
934 return radeon_emit_packet3( cmdbuf
);
938 int radeonSanityCmdBuffer( radeonContextPtr rmesa
,
940 XF86DRIClipRectRec
*boxes
)
943 drmRadeonCmdBuffer cmdbuf
;
944 drmRadeonCmdHeader header
;
945 static int inited
= 0;
952 cmdbuf
.buf
= rmesa
->store
.cmd_buf
;
953 cmdbuf
.bufsz
= rmesa
->store
.cmd_used
;
954 cmdbuf
.boxes
= (drmClipRect
*)boxes
;
957 while ( cmdbuf
.bufsz
>= sizeof(header
) ) {
959 header
.i
= *(int *)cmdbuf
.buf
;
960 cmdbuf
.buf
+= sizeof(header
);
961 cmdbuf
.bufsz
-= sizeof(header
);
963 switch (header
.header
.cmd_type
) {
964 case RADEON_CMD_PACKET
:
965 if (radeon_emit_packets( header
, &cmdbuf
)) {
966 fprintf(stderr
,"radeon_emit_packets failed\n");
971 case RADEON_CMD_SCALARS
:
972 if (radeon_emit_scalars( header
, &cmdbuf
)) {
973 fprintf(stderr
,"radeon_emit_scalars failed\n");
978 case RADEON_CMD_SCALARS2
:
979 if (radeon_emit_scalars2( header
, &cmdbuf
)) {
980 fprintf(stderr
,"radeon_emit_scalars failed\n");
985 case RADEON_CMD_VECTORS
:
986 if (radeon_emit_vectors( header
, &cmdbuf
)) {
987 fprintf(stderr
,"radeon_emit_vectors failed\n");
992 case RADEON_CMD_DMA_DISCARD
:
993 idx
= header
.dma
.buf_idx
;
995 fprintf(stderr
, "RADEON_CMD_DMA_DISCARD buf %d\n", idx
);
999 case RADEON_CMD_PACKET3
:
1000 if (radeon_emit_packet3( &cmdbuf
)) {
1001 fprintf(stderr
,"radeon_emit_packet3 failed\n");
1006 case RADEON_CMD_PACKET3_CLIP
:
1007 if (radeon_emit_packet3_cliprect( &cmdbuf
)) {
1008 fprintf(stderr
,"radeon_emit_packet3_clip failed\n");
1013 case RADEON_CMD_WAIT
:
1017 fprintf(stderr
,"bad cmd_type %d at %p\n",
1018 header
.header
.cmd_type
,
1019 cmdbuf
.buf
- sizeof(header
));
1029 fprintf(stderr
, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
1031 total
, total_changed
,
1032 ((float)total_changed
/(float)total
*100.0));
1033 fprintf(stderr
, "Total emitted per buf: %.2f\n",
1034 (float)total
/(float)bufs
);
1035 fprintf(stderr
, "Real changes per buf: %.2f\n",
1036 (float)total_changed
/(float)bufs
);
1038 bufs
= n
= total
= total_changed
= 0;