radeon: Fix legacy bo not to reuse dma buffers before refcount is 1.
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
36
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
39 do { \
40 int __tmp; \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
43 : "0" (nr), \
44 "D" ((long)dst), \
45 "S" ((long)src) ); \
46 } while (0)
47 #else
48 #define COPY_DWORDS( dst, src, nr ) \
49 do { \
50 int j; \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
53 dst += nr; \
54 } while (0)
55 #endif
56
57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58 {
59 int i;
60
61 if (RADEON_DEBUG & RADEON_VERTS)
62 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__, count, stride, (void *)out, (void *)data);
64
65 if (stride == 4)
66 COPY_DWORDS(out, data, count);
67 else
68 for (i = 0; i < count; i++) {
69 out[0] = *(int *)data;
70 out++;
71 data += stride;
72 }
73 }
74
75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76 {
77 int i;
78
79 if (RADEON_DEBUG & RADEON_VERTS)
80 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__, count, stride, (void *)out, (void *)data);
82
83 if (stride == 8)
84 COPY_DWORDS(out, data, count * 2);
85 else
86 for (i = 0; i < count; i++) {
87 out[0] = *(int *)data;
88 out[1] = *(int *)(data + 4);
89 out += 2;
90 data += stride;
91 }
92 }
93
94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95 {
96 int i;
97
98 if (RADEON_DEBUG & RADEON_VERTS)
99 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__, count, stride, (void *)out, (void *)data);
101
102 if (stride == 12) {
103 COPY_DWORDS(out, data, count * 3);
104 }
105 else
106 for (i = 0; i < count; i++) {
107 out[0] = *(int *)data;
108 out[1] = *(int *)(data + 4);
109 out[2] = *(int *)(data + 8);
110 out += 3;
111 data += stride;
112 }
113 }
114
115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116 {
117 int i;
118
119 if (RADEON_DEBUG & RADEON_VERTS)
120 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__, count, stride, (void *)out, (void *)data);
122
123 if (stride == 16)
124 COPY_DWORDS(out, data, count * 4);
125 else
126 for (i = 0; i < count; i++) {
127 out[0] = *(int *)data;
128 out[1] = *(int *)(data + 4);
129 out[2] = *(int *)(data + 8);
130 out[3] = *(int *)(data + 12);
131 out += 4;
132 data += stride;
133 }
134 }
135
136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137 const GLvoid * data, int size, int stride, int count)
138 {
139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140 uint32_t *out;
141
142 if (stride == 0) {
143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144 count = 1;
145 aos->stride = 0;
146 } else {
147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148 aos->stride = size;
149 }
150
151 aos->components = size;
152 aos->count = count;
153
154 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
155 switch (size) {
156 case 1: radeonEmitVec4(out, data, stride, count); break;
157 case 2: radeonEmitVec8(out, data, stride, count); break;
158 case 3: radeonEmitVec12(out, data, stride, count); break;
159 case 4: radeonEmitVec16(out, data, stride, count); break;
160 default:
161 assert(0);
162 break;
163 }
164 }
165
166 void radeon_init_dma(radeonContextPtr rmesa)
167 {
168 make_empty_list(&rmesa->dma.free);
169 make_empty_list(&rmesa->dma.wait);
170 make_empty_list(&rmesa->dma.reserved);
171 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
172 }
173
174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
175 {
176 struct radeon_dma_bo *dma_bo = NULL;
177 /* we set minimum sizes to at least requested size
178 aligned to next 16 bytes. */
179 if (size > rmesa->dma.minimum_size)
180 rmesa->dma.minimum_size = (size + 15) & (~15);
181
182 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
183 __FUNCTION__, size, rmesa->dma.minimum_size);
184
185
186 /* unmap old reserved bo */
187 if (!is_empty_list(&rmesa->dma.reserved))
188 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
189
190 if (is_empty_list(&rmesa->dma.free)
191 || last_elem(&rmesa->dma.free)->bo->size < size) {
192 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
193 assert(dma_bo);
194
195 again_alloc:
196 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
197 0, rmesa->dma.minimum_size, 4,
198 RADEON_GEM_DOMAIN_GTT, 0);
199
200 if (!dma_bo->bo) {
201 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
202 goto again_alloc;
203 }
204 insert_at_head(&rmesa->dma.reserved, dma_bo);
205 } else {
206 /* We push and pop buffers from end of list so we can keep
207 counter on unused buffers for later freeing them from
208 begin of list */
209 dma_bo = last_elem(&rmesa->dma.free);
210 remove_from_list(dma_bo);
211 insert_at_head(&rmesa->dma.reserved, dma_bo);
212 }
213
214 rmesa->dma.current_used = 0;
215 rmesa->dma.current_vertexptr = 0;
216
217 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
218 first_elem(&rmesa->dma.reserved)->bo,
219 RADEON_GEM_DOMAIN_GTT, 0))
220 fprintf(stderr,"failure to revalidate BOs - badness\n");
221
222 if (is_empty_list(&rmesa->dma.reserved)) {
223 /* Cmd buff have been flushed in radeon_revalidate_bos */
224 goto again_alloc;
225 }
226
227 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
228 }
229
230 /* Allocates a region from rmesa->dma.current. If there isn't enough
231 * space in current, grab a new buffer (and discard what was left of current)
232 */
233 void radeonAllocDmaRegion(radeonContextPtr rmesa,
234 struct radeon_bo **pbo, int *poffset,
235 int bytes, int alignment)
236 {
237 if (RADEON_DEBUG & RADEON_IOCTL)
238 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
239
240 if (rmesa->dma.flush)
241 rmesa->dma.flush(rmesa->glCtx);
242
243 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
244
245 alignment--;
246 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
247
248 if (is_empty_list(&rmesa->dma.reserved)
249 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
250 radeonRefillCurrentDmaRegion(rmesa, bytes);
251
252 *poffset = rmesa->dma.current_used;
253 *pbo = first_elem(&rmesa->dma.reserved)->bo;
254 radeon_bo_ref(*pbo);
255
256 /* Always align to at least 16 bytes */
257 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
258 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
259
260 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
261 }
262
263 void radeonFreeDmaRegions(radeonContextPtr rmesa)
264 {
265 struct radeon_dma_bo *dma_bo;
266 struct radeon_dma_bo *temp;
267 if (RADEON_DEBUG & RADEON_DMA)
268 fprintf(stderr, "%s\n", __FUNCTION__);
269
270 foreach_s(dma_bo, temp, &rmesa->dma.free) {
271 remove_from_list(dma_bo);
272 radeon_bo_unref(dma_bo->bo);
273 FREE(dma_bo);
274 }
275
276 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
277 remove_from_list(dma_bo);
278 radeon_bo_unref(dma_bo->bo);
279 FREE(dma_bo);
280 }
281
282 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
283 remove_from_list(dma_bo);
284 radeon_bo_unmap(dma_bo->bo);
285 radeon_bo_unref(dma_bo->bo);
286 FREE(dma_bo);
287 }
288 }
289
290 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
291 {
292 if (is_empty_list(&rmesa->dma.reserved))
293 return;
294
295 if (RADEON_DEBUG & RADEON_IOCTL)
296 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
297 rmesa->dma.current_used -= return_bytes;
298 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
299 }
300
301 static int radeon_bo_is_idle(struct radeon_bo* bo)
302 {
303 uint32_t domain;
304 int ret = radeon_bo_is_busy(bo, &domain);
305 if (ret == -EINVAL) {
306 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
307 "This may cause small performance drop for you.\n");
308 }
309 /* Protect against bug in legacy bo handling that causes bos stay
310 * referenced even after they should be freed */
311 if (bo->cref != 1)
312 return 0;
313 return ret != -EBUSY;
314 }
315
316 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
317 {
318 struct radeon_dma_bo *dma_bo;
319 struct radeon_dma_bo *temp;
320 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
321 const int time = rmesa->dma.free.expire_counter;
322
323 if (RADEON_DEBUG & RADEON_DMA) {
324 size_t free = 0,
325 wait = 0,
326 reserved = 0;
327 foreach(dma_bo, &rmesa->dma.free)
328 ++free;
329
330 foreach(dma_bo, &rmesa->dma.wait)
331 ++wait;
332
333 foreach(dma_bo, &rmesa->dma.reserved)
334 ++reserved;
335
336 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
337 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
338 }
339
340 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
341 /* request updated cs processing information from kernel */
342 legacy_track_pending(rmesa->radeonScreen->bom, 0);
343 }
344 /* move waiting bos to free list.
345 wait list provides gpu time to handle data before reuse */
346 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
347 if (dma_bo->expire_counter == time) {
348 WARN_ONCE("Leaking dma buffer object!\n");
349 /* force free of buffer so we don't realy start
350 * leaking stuff now*/
351 while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {}
352 remove_from_list(dma_bo);
353 FREE(dma_bo);
354 continue;
355 }
356 /* free objects that are too small to be used because of large request */
357 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
358 radeon_bo_unref(dma_bo->bo);
359 remove_from_list(dma_bo);
360 FREE(dma_bo);
361 continue;
362 }
363 if (!radeon_bo_is_idle(dma_bo->bo))
364 continue;
365 remove_from_list(dma_bo);
366 dma_bo->expire_counter = expire_at;
367 insert_at_tail(&rmesa->dma.free, dma_bo);
368 }
369
370 /* unmap the last dma region */
371 if (!is_empty_list(&rmesa->dma.reserved))
372 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
373 /* move reserved to wait list */
374 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
375 /* free objects that are too small to be used because of large request */
376 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
377 radeon_bo_unref(dma_bo->bo);
378 remove_from_list(dma_bo);
379 FREE(dma_bo);
380 continue;
381 }
382 remove_from_list(dma_bo);
383 dma_bo->expire_counter = expire_at;
384 insert_at_tail(&rmesa->dma.wait, dma_bo);
385 }
386
387 /* free bos that have been unused for some time */
388 foreach_s(dma_bo, temp, &rmesa->dma.free) {
389 if (dma_bo->expire_counter != time)
390 break;
391 remove_from_list(dma_bo);
392 radeon_bo_unref(dma_bo->bo);
393 FREE(dma_bo);
394 }
395
396 }
397
398
399 /* Flush vertices in the current dma region.
400 */
401 void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
402 {
403 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
404 struct radeon_dma *dma = &rmesa->dma;
405
406
407 if (RADEON_DEBUG & RADEON_IOCTL)
408 fprintf(stderr, "%s\n", __FUNCTION__);
409 dma->flush = NULL;
410
411 if (!is_empty_list(&dma->reserved)) {
412 GLuint current_offset = dma->current_used;
413
414 assert (dma->current_used +
415 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
416 dma->current_vertexptr);
417
418 if (dma->current_used != dma->current_vertexptr) {
419 dma->current_used = dma->current_vertexptr;
420
421 rmesa->vtbl.swtcl_flush(ctx, current_offset);
422 }
423 rmesa->swtcl.numverts = 0;
424 }
425 }
426 /* Alloc space in the current dma region.
427 */
428 void *
429 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
430 {
431 GLuint bytes = vsize * nverts;
432 void *head;
433 if (RADEON_DEBUG & RADEON_IOCTL)
434 fprintf(stderr, "%s\n", __FUNCTION__);
435 if(is_empty_list(&rmesa->dma.reserved)
436 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
437 if (rmesa->dma.flush) {
438 rmesa->dma.flush(rmesa->glCtx);
439 }
440
441 radeonRefillCurrentDmaRegion(rmesa, bytes);
442
443 return NULL;
444 }
445
446 if (!rmesa->dma.flush) {
447 /* if cmdbuf flushed DMA restart */
448 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
449 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
450 }
451
452 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
453 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
454 ASSERT( rmesa->dma.current_used +
455 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
456 rmesa->dma.current_vertexptr );
457
458 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
459 rmesa->dma.current_vertexptr += bytes;
460 rmesa->swtcl.numverts += nverts;
461 return head;
462 }
463
464 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
465 {
466 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
467 int i;
468 if (RADEON_DEBUG & RADEON_IOCTL)
469 fprintf(stderr, "%s\n", __FUNCTION__);
470
471 if (radeon->dma.flush) {
472 radeon->dma.flush(radeon->glCtx);
473 }
474 for (i = 0; i < radeon->tcl.aos_count; i++) {
475 if (radeon->tcl.aos[i].bo) {
476 radeon_bo_unref(radeon->tcl.aos[i].bo);
477 radeon->tcl.aos[i].bo = NULL;
478
479 }
480 }
481 }