eba5d6e0198cfeebfafe384907440d9da6ad920f
[mesa.git] / src / mesa / drivers / dri / r200 / r200_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 /*
30 * Authors:
31 * Keith Whitwell <keith@tungstengraphics.com>
32 */
33
34 #include "main/glheader.h"
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/context.h"
38 #include "swrast/swrast.h"
39 #include "main/simple_list.h"
40
41 #include "radeon_cs.h"
42 #include "r200_context.h"
43 #include "common_cmdbuf.h"
44 #include "r200_state.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47 #include "r200_sanity.h"
48 #include "radeon_reg.h"
49
50 #define DEBUG_CMDBUF 0
51
52 /* The state atoms will be emitted in the order they appear in the atom list,
53 * so this step is important.
54 */
55 void r200SetUpAtomList( r200ContextPtr rmesa )
56 {
57 int i, mtu;
58
59 mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
60
61 make_empty_list(&rmesa->hw.atomlist);
62 rmesa->hw.atomlist.name = "atom-list";
63
64 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
65 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
66 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
67 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
68 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
69 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
70 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
71 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
72 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
73 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
74 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
75 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
76 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
77 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
78 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
79 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
80 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
81 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
82 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
83 for (i = 0; i < mtu; ++i)
84 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
85 for (i = 0; i < mtu; ++i)
86 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
87 for (i = 0; i < 6; ++i)
88 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
89 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
90 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
91 for (i = 0; i < 8; ++i)
92 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
93 for (i = 0; i < 3 + mtu; ++i)
94 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
95 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
96 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
97 for (i = 0; i < 2; ++i)
98 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
99 for (i = 0; i < 6; ++i)
100 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
101 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
102 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
103 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
104 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
105 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
106 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
107 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
108 insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
109 }
110
111 static void r200SaveHwState( r200ContextPtr rmesa )
112 {
113 struct radeon_state_atom *atom;
114 char * dest = rmesa->backup_store.cmd_buf;
115 uint32_t dwords;
116 if (R200_DEBUG & DEBUG_STATE)
117 fprintf(stderr, "%s\n", __FUNCTION__);
118
119 rmesa->backup_store.cmd_used = 0;
120
121 foreach( atom, &rmesa->hw.atomlist ) {
122 dwords = atom->check( rmesa->radeon.glCtx, atom );
123 if ( dwords ) {
124 int size = atom->cmd_size * 4;
125
126 if (atom->emit) {
127 (*atom->emit)(rmesa->radeon.glCtx, atom);
128 } else {
129 memcpy( dest, atom->cmd, size);
130 dest += size;
131 rmesa->backup_store.cmd_used += size;
132 }
133 if (R200_DEBUG & DEBUG_STATE)
134 radeon_print_state_atom( atom );
135 }
136 }
137
138 assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
139 if (R200_DEBUG & DEBUG_STATE)
140 fprintf(stderr, "Returning to r200EmitState\n");
141 }
142
143 static INLINE void r200EmitAtoms(r200ContextPtr r200, GLboolean dirty)
144 {
145 BATCH_LOCALS(&r200->radeon);
146 struct radeon_state_atom *atom;
147 int dwords;
148
149 /* Emit actual atoms */
150 foreach(atom, &r200->hw.atomlist) {
151 if ((atom->dirty || r200->hw.all_dirty) == dirty) {
152 dwords = (*atom->check) (r200->radeon.glCtx, atom);
153 if (dwords) {
154 if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
155 radeon_print_state_atom(atom);
156 }
157 if (atom->emit) {
158 (*atom->emit)(r200->radeon.glCtx, atom);
159 } else {
160 BEGIN_BATCH_NO_AUTOSTATE(dwords);
161 OUT_BATCH_TABLE(atom->cmd, dwords);
162 END_BATCH();
163 }
164 atom->dirty = GL_FALSE;
165 } else {
166 if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
167 fprintf(stderr, " skip state %s\n",
168 atom->name);
169 }
170 }
171 }
172 }
173
174 COMMIT_BATCH();
175 }
176
177 void r200EmitState( r200ContextPtr rmesa )
178 {
179 char *dest;
180 int mtu;
181 struct radeon_state_atom *atom;
182 uint32_t dwords;
183
184 if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
185 fprintf(stderr, "%s\n", __FUNCTION__);
186
187 if (rmesa->save_on_next_emit) {
188 r200SaveHwState(rmesa);
189 rmesa->save_on_next_emit = GL_FALSE;
190 }
191
192 if (rmesa->radeon.cmdbuf.cs->cdw && !rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
193 return;
194
195 mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
196
197 /* To avoid going across the entire set of states multiple times, just check
198 * for enough space for the case of emitting all state, and inline the
199 * r200AllocCmdBuf code here without all the checks.
200 */
201 rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->hw.max_state_size, __FUNCTION__);
202
203 if (!rmesa->radeon.cmdbuf.cs->cdw) {
204 if (RADEON_DEBUG & DEBUG_STATE)
205 fprintf(stderr, "Begin reemit state\n");
206
207 r200EmitAtoms(rmesa, GL_FALSE);
208 }
209
210 if (RADEON_DEBUG & DEBUG_STATE)
211 fprintf(stderr, "Begin dirty state\n");
212
213 r200EmitAtoms(rmesa, GL_TRUE);
214 rmesa->hw.is_dirty = GL_FALSE;
215 rmesa->hw.all_dirty = GL_FALSE;
216 }
217
218 /* Fire a section of the retained (indexed_verts) buffer as a regular
219 * primtive.
220 */
221 void r200EmitVbufPrim( r200ContextPtr rmesa,
222 GLuint primitive,
223 GLuint vertex_nr )
224 {
225 drm_radeon_cmd_header_t *cmd;
226 BATCH_LOCALS(&rmesa->radeon);
227
228 assert(!(primitive & R200_VF_PRIM_WALK_IND));
229
230 r200EmitState( rmesa );
231
232 if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
233 fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
234 rmesa->store.cmd_used/4, primitive, vertex_nr);
235
236 BEGIN_BATCH(3);
237 OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
238 OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
239 (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
240 END_BATCH();
241 }
242
243 static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
244 {
245 BATCH_LOCALS(&rmesa->radeon);
246
247 if (vertex_count > 0) {
248 BEGIN_BATCH(8);
249 OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
250 OUT_BATCH(R200_VF_PRIM_WALK_IND |
251 ((vertex_count + 0) << 16) |
252 type);
253
254 if (!rmesa->radeon.radeonScreen->kernel_mm) {
255 OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
256 OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
257 OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset,
258 rmesa->tcl.elt_dma_bo,
259 rmesa->tcl.elt_dma_offset,
260 RADEON_GEM_DOMAIN_GTT, 0, 0);
261 OUT_BATCH(vertex_count/2);
262 } else {
263 OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
264 OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
265 OUT_BATCH(rmesa->tcl.elt_dma_offset);
266 OUT_BATCH(vertex_count/2);
267 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
268 rmesa->tcl.elt_dma_bo,
269 RADEON_GEM_DOMAIN_GTT, 0, 0);
270 }
271 END_BATCH();
272 }
273 }
274
275 void r200FlushElts(GLcontext *ctx)
276 {
277 r200ContextPtr rmesa = R200_CONTEXT(ctx);
278 int dwords;
279 int nr, elt_used = rmesa->tcl.elt_used;
280
281 if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
282 fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
283
284 assert( rmesa->radeon.dma.flush == r200FlushElts );
285 rmesa->radeon.dma.flush = NULL;
286
287 elt_used = (elt_used + 2) & ~2;
288
289 nr = elt_used / 2;
290
291 radeon_bo_unmap(rmesa->tcl.elt_dma_bo);
292
293 r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
294
295 radeon_bo_unref(rmesa->tcl.elt_dma_bo);
296 rmesa->tcl.elt_dma_bo = NULL;
297
298 if (R200_DEBUG & DEBUG_SYNC) {
299 fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
300 r200Finish( rmesa->radeon.glCtx );
301 }
302 }
303
304
305 GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
306 GLuint primitive,
307 GLuint min_nr )
308 {
309 GLushort *retval;
310
311 if (R200_DEBUG & DEBUG_IOCTL)
312 fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
313
314 assert((primitive & R200_VF_PRIM_WALK_IND));
315
316 r200EmitState( rmesa );
317
318 rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
319 0, R200_ELT_BUF_SZ, 4,
320 RADEON_GEM_DOMAIN_GTT, 0);
321 rmesa->tcl.elt_dma_offset = 0;
322 rmesa->tcl.elt_used = min_nr * 2;
323
324 radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
325 retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
326
327
328 if (R200_DEBUG & DEBUG_PRIMS)
329 fprintf(stderr, "%s: header prim %x \n",
330 __FUNCTION__, primitive);
331
332 assert(!rmesa->radeon.dma.flush);
333 rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
334 rmesa->radeon.dma.flush = r200FlushElts;
335
336 return retval;
337 }
338
339
340
341 void r200EmitVertexAOS( r200ContextPtr rmesa,
342 GLuint vertex_size,
343 struct radeon_bo *bo,
344 GLuint offset )
345 {
346 BATCH_LOCALS(&rmesa->radeon);
347
348 if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
349 fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
350 __FUNCTION__, vertex_size, offset);
351
352
353 BEGIN_BATCH(5);
354 OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
355 OUT_BATCH(1);
356 OUT_BATCH(vertex_size | (vertex_size << 8));
357 OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
358 END_BATCH();
359 }
360
361 void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
362 {
363 BATCH_LOCALS(&rmesa->radeon);
364 uint32_t voffset;
365 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
366 int i;
367
368 if (RADEON_DEBUG & DEBUG_VERTS)
369 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
370 offset);
371
372 BEGIN_BATCH(sz+2);
373 OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
374 OUT_BATCH(nr);
375
376
377 if (!rmesa->radeon.radeonScreen->kernel_mm) {
378 for (i = 0; i + 1 < nr; i += 2) {
379 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
380 (rmesa->tcl.aos[i].stride << 8) |
381 (rmesa->tcl.aos[i + 1].components << 16) |
382 (rmesa->tcl.aos[i + 1].stride << 24));
383
384 voffset = rmesa->tcl.aos[i + 0].offset +
385 offset * 4 * rmesa->tcl.aos[i + 0].stride;
386 OUT_BATCH_RELOC(voffset,
387 rmesa->tcl.aos[i].bo,
388 voffset,
389 RADEON_GEM_DOMAIN_GTT,
390 0, 0);
391 voffset = rmesa->tcl.aos[i + 1].offset +
392 offset * 4 * rmesa->tcl.aos[i + 1].stride;
393 OUT_BATCH_RELOC(voffset,
394 rmesa->tcl.aos[i+1].bo,
395 voffset,
396 RADEON_GEM_DOMAIN_GTT,
397 0, 0);
398 }
399
400 if (nr & 1) {
401 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
402 (rmesa->tcl.aos[nr - 1].stride << 8));
403 voffset = rmesa->tcl.aos[nr - 1].offset +
404 offset * 4 * rmesa->tcl.aos[nr - 1].stride;
405 OUT_BATCH_RELOC(voffset,
406 rmesa->tcl.aos[nr - 1].bo,
407 voffset,
408 RADEON_GEM_DOMAIN_GTT,
409 0, 0);
410 }
411 } else {
412 for (i = 0; i + 1 < nr; i += 2) {
413 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
414 (rmesa->tcl.aos[i].stride << 8) |
415 (rmesa->tcl.aos[i + 1].components << 16) |
416 (rmesa->tcl.aos[i + 1].stride << 24));
417
418 voffset = rmesa->tcl.aos[i + 0].offset +
419 offset * 4 * rmesa->tcl.aos[i + 0].stride;
420 OUT_BATCH(voffset);
421 voffset = rmesa->tcl.aos[i + 1].offset +
422 offset * 4 * rmesa->tcl.aos[i + 1].stride;
423 OUT_BATCH(voffset);
424 }
425
426 if (nr & 1) {
427 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
428 (rmesa->tcl.aos[nr - 1].stride << 8));
429 voffset = rmesa->tcl.aos[nr - 1].offset +
430 offset * 4 * rmesa->tcl.aos[nr - 1].stride;
431 OUT_BATCH(voffset);
432 }
433 for (i = 0; i + 1 < nr; i += 2) {
434 voffset = rmesa->tcl.aos[i + 0].offset +
435 offset * 4 * rmesa->tcl.aos[i + 0].stride;
436 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
437 rmesa->tcl.aos[i+0].bo,
438 RADEON_GEM_DOMAIN_GTT,
439 0, 0);
440 voffset = rmesa->tcl.aos[i + 1].offset +
441 offset * 4 * rmesa->tcl.aos[i + 1].stride;
442 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
443 rmesa->tcl.aos[i+1].bo,
444 RADEON_GEM_DOMAIN_GTT,
445 0, 0);
446 }
447 if (nr & 1) {
448 voffset = rmesa->tcl.aos[nr - 1].offset +
449 offset * 4 * rmesa->tcl.aos[nr - 1].stride;
450 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
451 rmesa->tcl.aos[nr-1].bo,
452 RADEON_GEM_DOMAIN_GTT,
453 0, 0);
454 }
455 }
456 END_BATCH();
457 }
458
459 void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type)
460 {
461 BATCH_LOCALS(&rmesa->radeon);
462
463 BEGIN_BATCH(3);
464 OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
465 OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type);
466 END_BATCH();
467 }
468