r200: fix indexed draw color order and cs missmatch
[mesa.git] / src / mesa / drivers / dri / r200 / r200_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 /*
30 * Authors:
31 * Keith Whitwell <keith@tungstengraphics.com>
32 */
33
34 #include "main/glheader.h"
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/context.h"
38 #include "swrast/swrast.h"
39 #include "main/simple_list.h"
40
41 #include "radeon_common.h"
42 #include "r200_context.h"
43 #include "r200_state.h"
44 #include "r200_ioctl.h"
45 #include "r200_tcl.h"
46 #include "r200_sanity.h"
47 #include "radeon_reg.h"
48
49 /* The state atoms will be emitted in the order they appear in the atom list,
50 * so this step is important.
51 */
52 void r200SetUpAtomList( r200ContextPtr rmesa )
53 {
54 int i, mtu;
55
56 mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
57
58 make_empty_list(&rmesa->radeon.hw.atomlist);
59 rmesa->radeon.hw.atomlist.name = "atom-list";
60
61 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
62 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
63 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
64 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
65 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
66 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
67 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
68 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
69 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
70 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
71 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
72 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
73 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
74 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
75 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
76 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
77 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
78 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
79 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
80 for (i = 0; i < mtu; ++i)
81 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
82 for (i = 0; i < mtu; ++i)
83 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
84 for (i = 0; i < 6; ++i)
85 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
86 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
87 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
88 for (i = 0; i < 8; ++i)
89 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
90 for (i = 0; i < 3 + mtu; ++i)
91 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
92 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
93 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
94 for (i = 0; i < 2; ++i)
95 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
96 for (i = 0; i < 6; ++i)
97 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
98 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
99 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
100 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
101 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
102 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
103 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
104 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
105 insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
106 }
107
108 /* Fire a section of the retained (indexed_verts) buffer as a regular
109 * primtive.
110 */
111 void r200EmitVbufPrim( r200ContextPtr rmesa,
112 GLuint primitive,
113 GLuint vertex_nr )
114 {
115 BATCH_LOCALS(&rmesa->radeon);
116
117 assert(!(primitive & R200_VF_PRIM_WALK_IND));
118
119 radeonEmitState(&rmesa->radeon);
120
121 if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
122 fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
123 rmesa->store.cmd_used/4, primitive, vertex_nr);
124
125 BEGIN_BATCH(3);
126 OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
127 OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
128 (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
129 END_BATCH();
130 }
131
132 static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
133 {
134 BATCH_LOCALS(&rmesa->radeon);
135
136 if (vertex_count > 0) {
137 BEGIN_BATCH(8+2);
138 OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
139 OUT_BATCH(R200_VF_PRIM_WALK_IND |
140 R200_VF_COLOR_ORDER_RGBA |
141 ((vertex_count + 0) << 16) |
142 type);
143
144 if (!rmesa->radeon.radeonScreen->kernel_mm) {
145 OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
146 OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
147 OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
148 rmesa->radeon.tcl.elt_dma_bo,
149 rmesa->radeon.tcl.elt_dma_offset,
150 RADEON_GEM_DOMAIN_GTT, 0, 0);
151 OUT_BATCH(vertex_count/2);
152 } else {
153 OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
154 OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
155 OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
156 OUT_BATCH(vertex_count/2);
157 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
158 rmesa->radeon.tcl.elt_dma_bo,
159 RADEON_GEM_DOMAIN_GTT, 0, 0);
160 }
161 END_BATCH();
162 }
163 }
164
165 void r200FlushElts(GLcontext *ctx)
166 {
167 r200ContextPtr rmesa = R200_CONTEXT(ctx);
168 int nr, elt_used = rmesa->tcl.elt_used;
169
170 if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
171 fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
172
173 assert( rmesa->radeon.dma.flush == r200FlushElts );
174 rmesa->radeon.dma.flush = NULL;
175
176 elt_used = (elt_used + 2) & ~2;
177
178 nr = elt_used / 2;
179
180 radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
181
182 r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
183
184 radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo);
185 rmesa->radeon.tcl.elt_dma_bo = NULL;
186
187 if (R200_DEBUG & DEBUG_SYNC) {
188 fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
189 radeonFinish( rmesa->radeon.glCtx );
190 }
191 }
192
193
194 GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
195 GLuint primitive,
196 GLuint min_nr )
197 {
198 GLushort *retval;
199
200 if (R200_DEBUG & DEBUG_IOCTL)
201 fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
202
203 assert((primitive & R200_VF_PRIM_WALK_IND));
204
205 radeonEmitState(&rmesa->radeon);
206
207 rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
208 0, R200_ELT_BUF_SZ, 4,
209 RADEON_GEM_DOMAIN_GTT, 0);
210 rmesa->radeon.tcl.elt_dma_offset = 0;
211 rmesa->tcl.elt_used = min_nr * 2;
212
213 radeon_validate_bo(&rmesa->radeon, rmesa->radeon.tcl.elt_dma_bo,
214 RADEON_GEM_DOMAIN_GTT, 0);
215 if (radeon_revalidate_bos(rmesa->radeon.glCtx) == GL_FALSE)
216 fprintf(stderr,"failure to revalidate BOs - badness\n");
217
218 radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
219 retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
220
221
222 if (R200_DEBUG & DEBUG_PRIMS)
223 fprintf(stderr, "%s: header prim %x \n",
224 __FUNCTION__, primitive);
225
226 assert(!rmesa->radeon.dma.flush);
227 rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
228 rmesa->radeon.dma.flush = r200FlushElts;
229
230 return retval;
231 }
232
233
234
235 void r200EmitVertexAOS( r200ContextPtr rmesa,
236 GLuint vertex_size,
237 struct radeon_bo *bo,
238 GLuint offset )
239 {
240 BATCH_LOCALS(&rmesa->radeon);
241
242 if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
243 fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
244 __FUNCTION__, vertex_size, offset);
245
246
247 BEGIN_BATCH(7);
248 OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
249 OUT_BATCH(1);
250 OUT_BATCH(vertex_size | (vertex_size << 8));
251 OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
252 END_BATCH();
253 }
254
255 void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
256 {
257 BATCH_LOCALS(&rmesa->radeon);
258 uint32_t voffset;
259 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
260 int i;
261
262 if (RADEON_DEBUG & DEBUG_VERTS)
263 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
264 offset);
265
266 BEGIN_BATCH(sz+2+ (nr*2));
267 OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
268 OUT_BATCH(nr);
269
270
271 if (!rmesa->radeon.radeonScreen->kernel_mm) {
272 for (i = 0; i + 1 < nr; i += 2) {
273 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
274 (rmesa->radeon.tcl.aos[i].stride << 8) |
275 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
276 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
277
278 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
279 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
280 OUT_BATCH_RELOC(voffset,
281 rmesa->radeon.tcl.aos[i].bo,
282 voffset,
283 RADEON_GEM_DOMAIN_GTT,
284 0, 0);
285 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
286 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
287 OUT_BATCH_RELOC(voffset,
288 rmesa->radeon.tcl.aos[i+1].bo,
289 voffset,
290 RADEON_GEM_DOMAIN_GTT,
291 0, 0);
292 }
293
294 if (nr & 1) {
295 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
296 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
297 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
298 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
299 OUT_BATCH_RELOC(voffset,
300 rmesa->radeon.tcl.aos[nr - 1].bo,
301 voffset,
302 RADEON_GEM_DOMAIN_GTT,
303 0, 0);
304 }
305 } else {
306 for (i = 0; i + 1 < nr; i += 2) {
307 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
308 (rmesa->radeon.tcl.aos[i].stride << 8) |
309 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
310 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
311
312 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
313 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
314 OUT_BATCH(voffset);
315 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
316 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
317 OUT_BATCH(voffset);
318 }
319
320 if (nr & 1) {
321 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
322 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
323 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
324 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
325 OUT_BATCH(voffset);
326 }
327 for (i = 0; i + 1 < nr; i += 2) {
328 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
329 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
330 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
331 rmesa->radeon.tcl.aos[i+0].bo,
332 RADEON_GEM_DOMAIN_GTT,
333 0, 0);
334 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
335 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
336 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
337 rmesa->radeon.tcl.aos[i+1].bo,
338 RADEON_GEM_DOMAIN_GTT,
339 0, 0);
340 }
341 if (nr & 1) {
342 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
343 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
344 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
345 rmesa->radeon.tcl.aos[nr-1].bo,
346 RADEON_GEM_DOMAIN_GTT,
347 0, 0);
348 }
349 }
350 END_BATCH();
351 }