Close some races with locking on R100 and R200 which could manifest as rendering
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_ioctl.c
1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.11 2003/01/29 22:04:59 dawes Exp $ */
2 /**************************************************************************
3
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 All Rights Reserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 **************************************************************************/
30
31 /*
32 * Authors:
33 * Kevin E. Martin <martin@valinux.com>
34 * Gareth Hughes <gareth@valinux.com>
35 * Keith Whitwell <keith@tungstengraphics.com>
36 */
37
38 #include <sched.h>
39 #include <errno.h>
40
41 #include "glheader.h"
42 #include "imports.h"
43 #include "simple_list.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_context.h"
47 #include "radeon_state.h"
48 #include "radeon_ioctl.h"
49 #include "radeon_tcl.h"
50 #include "radeon_sanity.h"
51
52 #define STANDALONE_MMIO
53 #include "radeon_macros.h" /* for INREG() */
54
55 #include "vblank.h"
56
57 #define RADEON_TIMEOUT 512
58 #define RADEON_IDLE_RETRY 16
59
60
61 static void radeonWaitForIdle( radeonContextPtr rmesa );
62
63 /* =============================================================
64 * Kernel command buffer handling
65 */
66
67 static void print_state_atom( struct radeon_state_atom *state )
68 {
69 int i;
70
71 fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
72
73 if (RADEON_DEBUG & DEBUG_VERBOSE)
74 for (i = 0 ; i < state->cmd_size ; i++)
75 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
76
77 }
78
79 static void radeon_emit_state_list( radeonContextPtr rmesa,
80 struct radeon_state_atom *list )
81 {
82 struct radeon_state_atom *state, *tmp;
83 char *dest;
84 int i, size, texunits;
85
86 /* It appears that some permutations of state atoms lock up the
87 * chip. Therefore we make sure that state atoms are emitted in a
88 * fixed order. First mark all dirty state atoms and then go
89 * through all state atoms in a well defined order and emit only
90 * the marked ones.
91 * FIXME: This requires knowledge of which state atoms exist.
92 * FIXME: Is the zbs hack below still needed?
93 */
94 size = 0;
95 foreach_s( state, tmp, list ) {
96 if (state->check( rmesa->glCtx )) {
97 size += state->cmd_size;
98 state->dirty = GL_TRUE;
99 move_to_head( &(rmesa->hw.clean), state );
100 if (RADEON_DEBUG & DEBUG_STATE)
101 print_state_atom( state );
102 }
103 else if (RADEON_DEBUG & DEBUG_STATE)
104 fprintf(stderr, "skip state %s\n", state->name);
105 }
106 /* short cut */
107 if (!size)
108 return;
109
110 dest = radeonAllocCmdBuf( rmesa, size * 4, __FUNCTION__);
111 texunits = rmesa->glCtx->Const.MaxTextureUnits;
112
113 #define EMIT_ATOM(ATOM) \
114 do { \
115 if (rmesa->hw.ATOM.dirty) { \
116 rmesa->hw.ATOM.dirty = GL_FALSE; \
117 memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \
118 dest += rmesa->hw.ATOM.cmd_size * 4; \
119 } \
120 } while (0)
121
122 EMIT_ATOM (ctx);
123 EMIT_ATOM (set);
124 EMIT_ATOM (lin);
125 EMIT_ATOM (msk);
126 EMIT_ATOM (vpt);
127 EMIT_ATOM (tcl);
128 EMIT_ATOM (msc);
129 for (i = 0; i < texunits; ++i) {
130 EMIT_ATOM (tex[i]);
131 EMIT_ATOM (txr[i]);
132 }
133 EMIT_ATOM (zbs);
134 EMIT_ATOM (mtl);
135 for (i = 0; i < 3 + texunits; ++i)
136 EMIT_ATOM (mat[i]);
137 for (i = 0; i < 8; ++i)
138 EMIT_ATOM (lit[i]);
139 for (i = 0; i < 6; ++i)
140 EMIT_ATOM (ucp[i]);
141 EMIT_ATOM (eye);
142 EMIT_ATOM (grd);
143 EMIT_ATOM (fog);
144 EMIT_ATOM (glt);
145
146 #undef EMIT_ATOM
147 }
148
149
150 void radeonEmitState( radeonContextPtr rmesa )
151 {
152 struct radeon_state_atom *state, *tmp;
153
154 if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
155 fprintf(stderr, "%s\n", __FUNCTION__);
156
157 /* Somewhat overkill:
158 */
159 if (rmesa->lost_context) {
160 if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
161 fprintf(stderr, "%s - lost context\n", __FUNCTION__);
162
163 foreach_s( state, tmp, &(rmesa->hw.clean) )
164 move_to_tail(&(rmesa->hw.dirty), state );
165
166 rmesa->lost_context = 0;
167 }
168 else if (1) {
169 /* This is a darstardly kludge to work around a lockup that I
170 * haven't otherwise figured out.
171 */
172 move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
173 }
174
175 if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
176 foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
177 if (state->is_tcl) {
178 move_to_head( &(rmesa->hw.clean), state );
179 }
180 }
181 }
182
183 radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
184 }
185
186
187
188 /* Fire a section of the retained (indexed_verts) buffer as a regular
189 * primtive.
190 */
191 extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
192 GLuint vertex_format,
193 GLuint primitive,
194 GLuint vertex_nr )
195 {
196 drm_radeon_cmd_header_t *cmd;
197
198
199 assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
200
201 radeonEmitState( rmesa );
202
203 if (RADEON_DEBUG & DEBUG_IOCTL)
204 fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
205 rmesa->store.cmd_used/4);
206
207 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
208 __FUNCTION__ );
209 #if RADEON_OLD_PACKETS
210 cmd[0].i = 0;
211 cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
212 cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
213 cmd[2].i = rmesa->ioctl.vertex_offset;
214 cmd[3].i = vertex_nr;
215 cmd[4].i = vertex_format;
216 cmd[5].i = (primitive |
217 RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
218 RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
219 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
220 (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
221
222 if (RADEON_DEBUG & DEBUG_PRIMS)
223 fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
224 __FUNCTION__,
225 cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
226 #else
227 cmd[0].i = 0;
228 cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
229 cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
230 cmd[2].i = vertex_format;
231 cmd[3].i = (primitive |
232 RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
233 RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
234 RADEON_CP_VC_CNTL_MAOS_ENABLE |
235 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
236 (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
237
238
239 if (RADEON_DEBUG & DEBUG_PRIMS)
240 fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
241 __FUNCTION__,
242 cmd[1].i, cmd[2].i, cmd[3].i);
243 #endif
244 }
245
246
247 void radeonFlushElts( radeonContextPtr rmesa )
248 {
249 int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
250 int dwords;
251 #if RADEON_OLD_PACKETS
252 int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
253 #else
254 int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
255 #endif
256
257 if (RADEON_DEBUG & DEBUG_IOCTL)
258 fprintf(stderr, "%s\n", __FUNCTION__);
259
260 assert( rmesa->dma.flush == radeonFlushElts );
261 rmesa->dma.flush = 0;
262
263 /* Cope with odd number of elts:
264 */
265 rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
266 dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
267
268 #if RADEON_OLD_PACKETS
269 cmd[1] |= (dwords - 3) << 16;
270 cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
271 #else
272 cmd[1] |= (dwords - 3) << 16;
273 cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
274 #endif
275 }
276
277
278 GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
279 GLuint vertex_format,
280 GLuint primitive,
281 GLuint min_nr )
282 {
283 drm_radeon_cmd_header_t *cmd;
284 GLushort *retval;
285
286 if (RADEON_DEBUG & DEBUG_IOCTL)
287 fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
288
289 assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
290
291 radeonEmitState( rmesa );
292
293 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
294 ELTS_BUFSZ(min_nr),
295 __FUNCTION__ );
296 #if RADEON_OLD_PACKETS
297 cmd[0].i = 0;
298 cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
299 cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
300 cmd[2].i = rmesa->ioctl.vertex_offset;
301 cmd[3].i = 0xffff;
302 cmd[4].i = vertex_format;
303 cmd[5].i = (primitive |
304 RADEON_CP_VC_CNTL_PRIM_WALK_IND |
305 RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
306 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
307
308 retval = (GLushort *)(cmd+6);
309 #else
310 cmd[0].i = 0;
311 cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
312 cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
313 cmd[2].i = vertex_format;
314 cmd[3].i = (primitive |
315 RADEON_CP_VC_CNTL_PRIM_WALK_IND |
316 RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
317 RADEON_CP_VC_CNTL_MAOS_ENABLE |
318 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
319
320 retval = (GLushort *)(cmd+4);
321 #endif
322
323 if (RADEON_DEBUG & DEBUG_PRIMS)
324 fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
325 __FUNCTION__,
326 cmd[1].i, vertex_format, primitive);
327
328 assert(!rmesa->dma.flush);
329 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
330 rmesa->dma.flush = radeonFlushElts;
331
332 rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
333
334 return retval;
335 }
336
337
338
339 void radeonEmitVertexAOS( radeonContextPtr rmesa,
340 GLuint vertex_size,
341 GLuint offset )
342 {
343 #if RADEON_OLD_PACKETS
344 rmesa->ioctl.vertex_size = vertex_size;
345 rmesa->ioctl.vertex_offset = offset;
346 #else
347 drm_radeon_cmd_header_t *cmd;
348
349 if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
350 fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
351 __FUNCTION__, vertex_size, offset);
352
353 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
354 __FUNCTION__ );
355
356 cmd[0].i = 0;
357 cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
358 cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
359 cmd[2].i = 1;
360 cmd[3].i = vertex_size | (vertex_size << 8);
361 cmd[4].i = offset;
362 #endif
363 }
364
365
366 void radeonEmitAOS( radeonContextPtr rmesa,
367 struct radeon_dma_region **component,
368 GLuint nr,
369 GLuint offset )
370 {
371 #if RADEON_OLD_PACKETS
372 assert( nr == 1 );
373 assert( component[0]->aos_size == component[0]->aos_stride );
374 rmesa->ioctl.vertex_size = component[0]->aos_size;
375 rmesa->ioctl.vertex_offset =
376 (component[0]->aos_start + offset * component[0]->aos_stride * 4);
377 #else
378 drm_radeon_cmd_header_t *cmd;
379 int sz = AOS_BUFSZ;
380 int i;
381 int *tmp;
382
383 if (RADEON_DEBUG & DEBUG_IOCTL)
384 fprintf(stderr, "%s\n", __FUNCTION__);
385
386
387 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
388 __FUNCTION__ );
389 cmd[0].i = 0;
390 cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
391 cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
392 cmd[2].i = nr;
393 tmp = &cmd[0].i;
394 cmd += 3;
395
396 for (i = 0 ; i < nr ; i++) {
397 if (i & 1) {
398 cmd[0].i |= ((component[i]->aos_stride << 24) |
399 (component[i]->aos_size << 16));
400 cmd[2].i = (component[i]->aos_start +
401 offset * component[i]->aos_stride * 4);
402 cmd += 3;
403 }
404 else {
405 cmd[0].i = ((component[i]->aos_stride << 8) |
406 (component[i]->aos_size << 0));
407 cmd[1].i = (component[i]->aos_start +
408 offset * component[i]->aos_stride * 4);
409 }
410 }
411
412 if (RADEON_DEBUG & DEBUG_VERTS) {
413 fprintf(stderr, "%s:\n", __FUNCTION__);
414 for (i = 0 ; i < sz ; i++)
415 fprintf(stderr, " %d: %x\n", i, tmp[i]);
416 }
417 #endif
418 }
419
420 /* using already shifted color_fmt! */
421 void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
422 GLuint color_fmt,
423 GLuint src_pitch,
424 GLuint src_offset,
425 GLuint dst_pitch,
426 GLuint dst_offset,
427 GLint srcx, GLint srcy,
428 GLint dstx, GLint dsty,
429 GLuint w, GLuint h )
430 {
431 drm_radeon_cmd_header_t *cmd;
432
433 if (RADEON_DEBUG & DEBUG_IOCTL)
434 fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
435 __FUNCTION__,
436 src_pitch, src_offset, srcx, srcy,
437 dst_pitch, dst_offset, dstx, dsty,
438 w, h);
439
440 assert( (src_pitch & 63) == 0 );
441 assert( (dst_pitch & 63) == 0 );
442 assert( (src_offset & 1023) == 0 );
443 assert( (dst_offset & 1023) == 0 );
444 assert( w < (1<<16) );
445 assert( h < (1<<16) );
446
447 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
448 __FUNCTION__ );
449
450
451 cmd[0].i = 0;
452 cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
453 cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
454 cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
455 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
456 RADEON_GMC_BRUSH_NONE |
457 color_fmt |
458 RADEON_GMC_SRC_DATATYPE_COLOR |
459 RADEON_ROP3_S |
460 RADEON_DP_SRC_SOURCE_MEMORY |
461 RADEON_GMC_CLR_CMP_CNTL_DIS |
462 RADEON_GMC_WR_MSK_DIS );
463
464 cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
465 cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
466 cmd[5].i = (srcx << 16) | srcy;
467 cmd[6].i = (dstx << 16) | dsty; /* dst */
468 cmd[7].i = (w << 16) | h;
469 }
470
471
472 void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
473 {
474 if (rmesa->dri.drmMinor >= 6) {
475 drm_radeon_cmd_header_t *cmd;
476
477 assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
478
479 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
480 __FUNCTION__ );
481 cmd[0].i = 0;
482 cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
483 cmd[0].wait.flags = flags;
484 }
485 }
486
487
488 static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
489 const char * caller )
490 {
491 int ret, i;
492 drm_radeon_cmd_buffer_t cmd;
493
494 if (RADEON_DEBUG & DEBUG_IOCTL) {
495 fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
496
497 if (RADEON_DEBUG & DEBUG_VERBOSE)
498 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
499 fprintf(stderr, "%d: %x\n", i/4,
500 *(int *)(&rmesa->store.cmd_buf[i]));
501 }
502
503 if (RADEON_DEBUG & DEBUG_DMA)
504 fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
505 rmesa->dma.nr_released_bufs);
506
507
508 if (RADEON_DEBUG & DEBUG_SANITY) {
509 if (rmesa->state.scissor.enabled)
510 ret = radeonSanityCmdBuffer( rmesa,
511 rmesa->state.scissor.numClipRects,
512 rmesa->state.scissor.pClipRects);
513 else
514 ret = radeonSanityCmdBuffer( rmesa,
515 rmesa->numClipRects,
516 rmesa->pClipRects);
517 if (ret) {
518 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
519 goto out;
520 }
521 }
522
523
524 cmd.bufsz = rmesa->store.cmd_used;
525 cmd.buf = rmesa->store.cmd_buf;
526
527 if (rmesa->state.scissor.enabled) {
528 cmd.nbox = rmesa->state.scissor.numClipRects;
529 cmd.boxes = rmesa->state.scissor.pClipRects;
530 } else {
531 cmd.nbox = rmesa->numClipRects;
532 cmd.boxes = rmesa->pClipRects;
533 }
534
535 ret = drmCommandWrite( rmesa->dri.fd,
536 DRM_RADEON_CMDBUF,
537 &cmd, sizeof(cmd) );
538
539 if (ret)
540 fprintf(stderr, "drmCommandWrite: %d\n", ret);
541
542 out:
543 rmesa->store.primnr = 0;
544 rmesa->store.statenr = 0;
545 rmesa->store.cmd_used = 0;
546 rmesa->dma.nr_released_bufs = 0;
547 return ret;
548 }
549
550
551 /* Note: does not emit any commands to avoid recursion on
552 * radeonAllocCmdBuf.
553 */
554 void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
555 {
556 int ret;
557
558
559 LOCK_HARDWARE( rmesa );
560
561 ret = radeonFlushCmdBufLocked( rmesa, caller );
562
563 UNLOCK_HARDWARE( rmesa );
564
565 if (ret) {
566 fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
567 exit(ret);
568 }
569 }
570
571 /* =============================================================
572 * Hardware vertex buffer handling
573 */
574
575
576 void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
577 {
578 struct radeon_dma_buffer *dmabuf;
579 int fd = rmesa->dri.fd;
580 int index = 0;
581 int size = 0;
582 drmDMAReq dma;
583 int ret;
584
585 if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
586 fprintf(stderr, "%s\n", __FUNCTION__);
587
588 if (rmesa->dma.flush) {
589 rmesa->dma.flush( rmesa );
590 }
591
592 if (rmesa->dma.current.buf)
593 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
594
595 if (rmesa->dma.nr_released_bufs > 4)
596 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
597
598 dma.context = rmesa->dri.hwContext;
599 dma.send_count = 0;
600 dma.send_list = NULL;
601 dma.send_sizes = NULL;
602 dma.flags = 0;
603 dma.request_count = 1;
604 dma.request_size = RADEON_BUFFER_SIZE;
605 dma.request_list = &index;
606 dma.request_sizes = &size;
607 dma.granted_count = 0;
608
609 LOCK_HARDWARE(rmesa); /* no need to validate */
610
611 ret = drmDMA( fd, &dma );
612
613 if (ret != 0) {
614 /* Free some up this way?
615 */
616 if (rmesa->dma.nr_released_bufs) {
617 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
618 }
619
620 if (RADEON_DEBUG & DEBUG_DMA)
621 fprintf(stderr, "Waiting for buffers\n");
622
623 radeonWaitForIdleLocked( rmesa );
624 ret = drmDMA( fd, &dma );
625
626 if ( ret != 0 ) {
627 UNLOCK_HARDWARE( rmesa );
628 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
629 exit( -1 );
630 }
631 }
632
633 UNLOCK_HARDWARE(rmesa);
634
635 if (RADEON_DEBUG & DEBUG_DMA)
636 fprintf(stderr, "Allocated buffer %d\n", index);
637
638 dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
639 dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
640 dmabuf->refcount = 1;
641
642 rmesa->dma.current.buf = dmabuf;
643 rmesa->dma.current.address = dmabuf->buf->address;
644 rmesa->dma.current.end = dmabuf->buf->total;
645 rmesa->dma.current.start = 0;
646 rmesa->dma.current.ptr = 0;
647
648 rmesa->c_vertexBuffers++;
649 }
650
651 void radeonReleaseDmaRegion( radeonContextPtr rmesa,
652 struct radeon_dma_region *region,
653 const char *caller )
654 {
655 if (RADEON_DEBUG & DEBUG_IOCTL)
656 fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
657
658 if (!region->buf)
659 return;
660
661 if (rmesa->dma.flush)
662 rmesa->dma.flush( rmesa );
663
664 if (--region->buf->refcount == 0) {
665 drm_radeon_cmd_header_t *cmd;
666
667 if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
668 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
669 region->buf->buf->idx);
670
671 cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd),
672 __FUNCTION__ );
673 cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
674 cmd->dma.buf_idx = region->buf->buf->idx;
675 FREE(region->buf);
676 rmesa->dma.nr_released_bufs++;
677 }
678
679 region->buf = 0;
680 region->start = 0;
681 }
682
683 /* Allocates a region from rmesa->dma.current. If there isn't enough
684 * space in current, grab a new buffer (and discard what was left of current)
685 */
686 void radeonAllocDmaRegion( radeonContextPtr rmesa,
687 struct radeon_dma_region *region,
688 int bytes,
689 int alignment )
690 {
691 if (RADEON_DEBUG & DEBUG_IOCTL)
692 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
693
694 if (rmesa->dma.flush)
695 rmesa->dma.flush( rmesa );
696
697 if (region->buf)
698 radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
699
700 alignment--;
701 rmesa->dma.current.start = rmesa->dma.current.ptr =
702 (rmesa->dma.current.ptr + alignment) & ~alignment;
703
704 if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
705 radeonRefillCurrentDmaRegion( rmesa );
706
707 region->start = rmesa->dma.current.start;
708 region->ptr = rmesa->dma.current.start;
709 region->end = rmesa->dma.current.start + bytes;
710 region->address = rmesa->dma.current.address;
711 region->buf = rmesa->dma.current.buf;
712 region->buf->refcount++;
713
714 rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
715 rmesa->dma.current.start =
716 rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
717 }
718
719 void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
720 struct radeon_dma_region *region,
721 int numverts,
722 int vertsize,
723 int alignment )
724 {
725 radeonAllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
726 }
727
728 /* ================================================================
729 * SwapBuffers with client-side throttling
730 */
731
732 static uint32_t radeonGetLastFrame (radeonContextPtr rmesa)
733 {
734 unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
735 int ret;
736 uint32_t frame;
737
738 if (rmesa->dri.screen->drmMinor >= 4) {
739 drm_radeon_getparam_t gp;
740
741 gp.param = RADEON_PARAM_LAST_FRAME;
742 gp.value = (int *)&frame;
743 ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
744 &gp, sizeof(gp) );
745 }
746 else
747 ret = -EINVAL;
748
749 if ( ret == -EINVAL ) {
750 frame = INREG( RADEON_LAST_FRAME_REG );
751 ret = 0;
752 }
753 if ( ret ) {
754 fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
755 exit(1);
756 }
757
758 return frame;
759 }
760
761 static void radeonEmitIrqLocked( radeonContextPtr rmesa )
762 {
763 drm_radeon_irq_emit_t ie;
764 int ret;
765
766 ie.irq_seq = &rmesa->iw.irq_seq;
767 ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
768 &ie, sizeof(ie) );
769 if ( ret ) {
770 fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
771 exit(1);
772 }
773 }
774
775
776 static void radeonWaitIrq( radeonContextPtr rmesa )
777 {
778 int ret;
779
780 do {
781 ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
782 &rmesa->iw, sizeof(rmesa->iw) );
783 } while (ret && (errno == EINTR || errno == EAGAIN));
784
785 if ( ret ) {
786 fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
787 exit(1);
788 }
789 }
790
791
792 static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
793 {
794 drm_radeon_sarea_t *sarea = rmesa->sarea;
795
796 if (rmesa->do_irqs) {
797 if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
798 if (!rmesa->irqsEmitted) {
799 while (radeonGetLastFrame (rmesa) < sarea->last_frame)
800 ;
801 }
802 else {
803 UNLOCK_HARDWARE( rmesa );
804 radeonWaitIrq( rmesa );
805 LOCK_HARDWARE( rmesa );
806 }
807 rmesa->irqsEmitted = 10;
808 }
809
810 if (rmesa->irqsEmitted) {
811 radeonEmitIrqLocked( rmesa );
812 rmesa->irqsEmitted--;
813 }
814 }
815 else {
816 while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
817 UNLOCK_HARDWARE( rmesa );
818 if (rmesa->do_usleeps)
819 DO_USLEEP( 1 );
820 LOCK_HARDWARE( rmesa );
821 }
822 }
823 }
824
825 /* Copy the back color buffer to the front color buffer.
826 */
827 void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
828 {
829 radeonContextPtr rmesa;
830 GLint nbox, i, ret;
831 GLboolean missed_target;
832 int64_t ust;
833
834 assert(dPriv);
835 assert(dPriv->driContextPriv);
836 assert(dPriv->driContextPriv->driverPrivate);
837
838 rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
839
840 if ( RADEON_DEBUG & DEBUG_IOCTL ) {
841 fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
842 }
843
844 RADEON_FIREVERTICES( rmesa );
845 LOCK_HARDWARE( rmesa );
846
847 /* Throttle the frame rate -- only allow one pending swap buffers
848 * request at a time.
849 */
850 radeonWaitForFrameCompletion( rmesa );
851 UNLOCK_HARDWARE( rmesa );
852 driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
853 LOCK_HARDWARE( rmesa );
854
855 nbox = dPriv->numClipRects; /* must be in locked region */
856
857 for ( i = 0 ; i < nbox ; ) {
858 GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
859 drm_clip_rect_t *box = dPriv->pClipRects;
860 drm_clip_rect_t *b = rmesa->sarea->boxes;
861 GLint n = 0;
862
863 for ( ; i < nr ; i++ ) {
864 *b++ = box[i];
865 n++;
866 }
867 rmesa->sarea->nbox = n;
868
869 ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
870
871 if ( ret ) {
872 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
873 UNLOCK_HARDWARE( rmesa );
874 exit( 1 );
875 }
876 }
877
878 UNLOCK_HARDWARE( rmesa );
879 rmesa->swap_count++;
880 (*rmesa->get_ust)( & ust );
881 if ( missed_target ) {
882 rmesa->swap_missed_count++;
883 rmesa->swap_missed_ust = ust - rmesa->swap_ust;
884 }
885
886 rmesa->swap_ust = ust;
887 }
888
889 void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
890 {
891 radeonContextPtr rmesa;
892 GLint ret;
893 GLboolean missed_target;
894
895 assert(dPriv);
896 assert(dPriv->driContextPriv);
897 assert(dPriv->driContextPriv->driverPrivate);
898
899 rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
900
901 if ( RADEON_DEBUG & DEBUG_IOCTL ) {
902 fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
903 rmesa->sarea->pfCurrentPage);
904 }
905
906 RADEON_FIREVERTICES( rmesa );
907 LOCK_HARDWARE( rmesa );
908
909 /* Need to do this for the perf box placement:
910 */
911 if (dPriv->numClipRects)
912 {
913 drm_clip_rect_t *box = dPriv->pClipRects;
914 drm_clip_rect_t *b = rmesa->sarea->boxes;
915 b[0] = box[0];
916 rmesa->sarea->nbox = 1;
917 }
918
919 /* Throttle the frame rate -- only allow a few pending swap buffers
920 * request at a time.
921 */
922 radeonWaitForFrameCompletion( rmesa );
923 UNLOCK_HARDWARE( rmesa );
924 driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
925 if ( missed_target ) {
926 rmesa->swap_missed_count++;
927 (void) (*rmesa->get_ust)( & rmesa->swap_missed_ust );
928 }
929 LOCK_HARDWARE( rmesa );
930
931 ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
932
933 UNLOCK_HARDWARE( rmesa );
934
935 if ( ret ) {
936 fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
937 exit( 1 );
938 }
939
940 rmesa->swap_count++;
941 (void) (*rmesa->get_ust)( & rmesa->swap_ust );
942
943 if ( rmesa->sarea->pfCurrentPage == 1 ) {
944 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
945 rmesa->state.color.drawPitch = rmesa->radeonScreen->frontPitch;
946 } else {
947 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
948 rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
949 }
950
951 RADEON_STATECHANGE( rmesa, ctx );
952 rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
953 + rmesa->radeonScreen->fbLocation;
954 rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
955 }
956
957
958 /* ================================================================
959 * Buffer clear
960 */
961 #define RADEON_MAX_CLEARS 256
962
963 static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
964 GLint cx, GLint cy, GLint cw, GLint ch )
965 {
966 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
967 __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
968 drm_radeon_sarea_t *sarea = rmesa->sarea;
969 unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
970 uint32_t clear;
971 GLuint flags = 0;
972 GLuint color_mask = 0;
973 GLint ret, i;
974
975 if ( RADEON_DEBUG & DEBUG_IOCTL ) {
976 fprintf( stderr, "%s: all=%d cx=%d cy=%d cw=%d ch=%d\n",
977 __FUNCTION__, all, cx, cy, cw, ch );
978 }
979
980 /* Need to cope with lostcontext here as kernel relies on
981 * some residual state:
982 */
983 RADEON_FIREVERTICES( rmesa );
984
985 if ( mask & DD_FRONT_LEFT_BIT ) {
986 flags |= RADEON_FRONT;
987 color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
988 mask &= ~DD_FRONT_LEFT_BIT;
989 }
990
991 if ( mask & DD_BACK_LEFT_BIT ) {
992 flags |= RADEON_BACK;
993 color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
994 mask &= ~DD_BACK_LEFT_BIT;
995 }
996
997 if ( mask & DD_DEPTH_BIT ) {
998 if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
999 mask &= ~DD_DEPTH_BIT;
1000 }
1001
1002 if ( (mask & DD_STENCIL_BIT) && rmesa->state.stencil.hwBuffer ) {
1003 flags |= RADEON_STENCIL;
1004 mask &= ~DD_STENCIL_BIT;
1005 }
1006
1007 if ( mask ) {
1008 if (RADEON_DEBUG & DEBUG_FALLBACKS)
1009 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
1010 _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
1011 }
1012
1013 if ( !flags )
1014 return;
1015
1016
1017 /* Flip top to bottom */
1018 cx += dPriv->x;
1019 cy = dPriv->y + dPriv->h - cy - ch;
1020
1021 LOCK_HARDWARE( rmesa );
1022
1023 /* Throttle the number of clear ioctls we do.
1024 */
1025 while ( 1 ) {
1026 int ret;
1027
1028 if (rmesa->dri.screen->drmMinor >= 4) {
1029 drm_radeon_getparam_t gp;
1030
1031 gp.param = RADEON_PARAM_LAST_CLEAR;
1032 gp.value = (int *)&clear;
1033 ret = drmCommandWriteRead( rmesa->dri.fd,
1034 DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
1035 } else
1036 ret = -EINVAL;
1037
1038 if ( ret == -EINVAL ) {
1039 clear = INREG( RADEON_LAST_CLEAR_REG );
1040 ret = 0;
1041 }
1042 if ( ret ) {
1043 fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
1044 exit(1);
1045 }
1046 if ( RADEON_DEBUG & DEBUG_IOCTL ) {
1047 fprintf( stderr, "%s( %d )\n", __FUNCTION__, (int)clear );
1048 if ( ret ) fprintf( stderr, " ( RADEON_LAST_CLEAR register read directly )\n" );
1049 }
1050
1051 if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
1052 break;
1053 }
1054
1055 if ( rmesa->do_usleeps ) {
1056 UNLOCK_HARDWARE( rmesa );
1057 DO_USLEEP( 1 );
1058 LOCK_HARDWARE( rmesa );
1059 }
1060 }
1061
1062 for ( i = 0 ; i < dPriv->numClipRects ; ) {
1063 GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
1064 drm_clip_rect_t *box = dPriv->pClipRects;
1065 drm_clip_rect_t *b = rmesa->sarea->boxes;
1066 drm_radeon_clear_t clear;
1067 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1068 GLint n = 0;
1069
1070 if ( !all ) {
1071 for ( ; i < nr ; i++ ) {
1072 GLint x = box[i].x1;
1073 GLint y = box[i].y1;
1074 GLint w = box[i].x2 - x;
1075 GLint h = box[i].y2 - y;
1076
1077 if ( x < cx ) w -= cx - x, x = cx;
1078 if ( y < cy ) h -= cy - y, y = cy;
1079 if ( x + w > cx + cw ) w = cx + cw - x;
1080 if ( y + h > cy + ch ) h = cy + ch - y;
1081 if ( w <= 0 ) continue;
1082 if ( h <= 0 ) continue;
1083
1084 b->x1 = x;
1085 b->y1 = y;
1086 b->x2 = x + w;
1087 b->y2 = y + h;
1088 b++;
1089 n++;
1090 }
1091 } else {
1092 for ( ; i < nr ; i++ ) {
1093 *b++ = box[i];
1094 n++;
1095 }
1096 }
1097
1098 rmesa->sarea->nbox = n;
1099
1100 clear.flags = flags;
1101 clear.clear_color = rmesa->state.color.clear;
1102 clear.clear_depth = rmesa->state.depth.clear;
1103 clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
1104 clear.depth_mask = rmesa->state.stencil.clear;
1105 clear.depth_boxes = depth_boxes;
1106
1107 n--;
1108 b = rmesa->sarea->boxes;
1109 for ( ; n >= 0 ; n-- ) {
1110 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
1111 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
1112 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
1113 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
1114 depth_boxes[n].f[CLEAR_DEPTH] =
1115 (float)rmesa->state.depth.clear;
1116 }
1117
1118 ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
1119 &clear, sizeof(drm_radeon_clear_t));
1120
1121 if ( ret ) {
1122 UNLOCK_HARDWARE( rmesa );
1123 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
1124 exit( 1 );
1125 }
1126 }
1127
1128 UNLOCK_HARDWARE( rmesa );
1129 }
1130
1131
1132 void radeonWaitForIdleLocked( radeonContextPtr rmesa )
1133 {
1134 int fd = rmesa->dri.fd;
1135 int to = 0;
1136 int ret, i = 0;
1137
1138 rmesa->c_drawWaits++;
1139
1140 do {
1141 do {
1142 ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
1143 } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
1144 } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
1145
1146 if ( ret < 0 ) {
1147 UNLOCK_HARDWARE( rmesa );
1148 fprintf( stderr, "Error: Radeon timed out... exiting\n" );
1149 exit( -1 );
1150 }
1151 }
1152
1153
1154 static void radeonWaitForIdle( radeonContextPtr rmesa )
1155 {
1156 LOCK_HARDWARE(rmesa);
1157 radeonWaitForIdleLocked( rmesa );
1158 UNLOCK_HARDWARE(rmesa);
1159 }
1160
1161
1162 void radeonFlush( GLcontext *ctx )
1163 {
1164 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
1165
1166 if (RADEON_DEBUG & DEBUG_IOCTL)
1167 fprintf(stderr, "%s\n", __FUNCTION__);
1168
1169 if (rmesa->dma.flush)
1170 rmesa->dma.flush( rmesa );
1171
1172 if (!is_empty_list(&rmesa->hw.dirty))
1173 radeonEmitState( rmesa );
1174
1175 if (rmesa->store.cmd_used)
1176 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
1177 }
1178
1179 /* Make sure all commands have been sent to the hardware and have
1180 * completed processing.
1181 */
1182 void radeonFinish( GLcontext *ctx )
1183 {
1184 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1185 radeonFlush( ctx );
1186
1187 if (rmesa->do_irqs) {
1188 LOCK_HARDWARE( rmesa );
1189 radeonEmitIrqLocked( rmesa );
1190 UNLOCK_HARDWARE( rmesa );
1191 radeonWaitIrq( rmesa );
1192 }
1193 else
1194 radeonWaitForIdle( rmesa );
1195 }
1196
1197
1198 void radeonInitIoctlFuncs( GLcontext *ctx )
1199 {
1200 ctx->Driver.Clear = radeonClear;
1201 ctx->Driver.Finish = radeonFinish;
1202 ctx->Driver.Flush = radeonFlush;
1203 }
1204