c9a32c808bef93c3381b9983c0254fc4def22e07
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
36
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
39 do { \
40 int __tmp; \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
43 : "0" (nr), \
44 "D" ((long)dst), \
45 "S" ((long)src) ); \
46 } while (0)
47 #else
48 #define COPY_DWORDS( dst, src, nr ) \
49 do { \
50 int j; \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
53 dst += nr; \
54 } while (0)
55 #endif
56
57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58 {
59 int i;
60
61 if (RADEON_DEBUG & RADEON_VERTS)
62 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__, count, stride, (void *)out, (void *)data);
64
65 if (stride == 4)
66 COPY_DWORDS(out, data, count);
67 else
68 for (i = 0; i < count; i++) {
69 out[0] = *(int *)data;
70 out++;
71 data += stride;
72 }
73 }
74
75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76 {
77 int i;
78
79 if (RADEON_DEBUG & RADEON_VERTS)
80 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__, count, stride, (void *)out, (void *)data);
82
83 if (stride == 8)
84 COPY_DWORDS(out, data, count * 2);
85 else
86 for (i = 0; i < count; i++) {
87 out[0] = *(int *)data;
88 out[1] = *(int *)(data + 4);
89 out += 2;
90 data += stride;
91 }
92 }
93
94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95 {
96 int i;
97
98 if (RADEON_DEBUG & RADEON_VERTS)
99 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__, count, stride, (void *)out, (void *)data);
101
102 if (stride == 12) {
103 COPY_DWORDS(out, data, count * 3);
104 }
105 else
106 for (i = 0; i < count; i++) {
107 out[0] = *(int *)data;
108 out[1] = *(int *)(data + 4);
109 out[2] = *(int *)(data + 8);
110 out += 3;
111 data += stride;
112 }
113 }
114
115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116 {
117 int i;
118
119 if (RADEON_DEBUG & RADEON_VERTS)
120 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__, count, stride, (void *)out, (void *)data);
122
123 if (stride == 16)
124 COPY_DWORDS(out, data, count * 4);
125 else
126 for (i = 0; i < count; i++) {
127 out[0] = *(int *)data;
128 out[1] = *(int *)(data + 4);
129 out[2] = *(int *)(data + 8);
130 out[3] = *(int *)(data + 12);
131 out += 4;
132 data += stride;
133 }
134 }
135
136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137 const GLvoid * data, int size, int stride, int count)
138 {
139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140 uint32_t *out;
141
142 if (stride == 0) {
143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144 count = 1;
145 aos->stride = 0;
146 } else {
147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148 aos->stride = size;
149 }
150
151 aos->components = size;
152 aos->count = count;
153
154 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
155 switch (size) {
156 case 1: radeonEmitVec4(out, data, stride, count); break;
157 case 2: radeonEmitVec8(out, data, stride, count); break;
158 case 3: radeonEmitVec12(out, data, stride, count); break;
159 case 4: radeonEmitVec16(out, data, stride, count); break;
160 default:
161 assert(0);
162 break;
163 }
164 }
165
166 void radeon_init_dma(radeonContextPtr rmesa)
167 {
168 make_empty_list(&rmesa->dma.free);
169 make_empty_list(&rmesa->dma.wait);
170 make_empty_list(&rmesa->dma.reserved);
171 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
172 }
173
174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
175 {
176 struct radeon_dma_bo *dma_bo = NULL;
177 /* we set minimum sizes to at least requested size
178 aligned to next 16 bytes. */
179 if (size > rmesa->dma.minimum_size)
180 rmesa->dma.minimum_size = (size + 15) & (~15);
181
182 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
183 __FUNCTION__, size, rmesa->dma.minimum_size);
184
185
186 /* unmap old reserved bo */
187 if (!is_empty_list(&rmesa->dma.reserved))
188 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
189
190 if (is_empty_list(&rmesa->dma.free)
191 || last_elem(&rmesa->dma.free)->bo->size < size) {
192 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
193 assert(dma_bo);
194
195 again_alloc:
196 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
197 0, rmesa->dma.minimum_size, 4,
198 RADEON_GEM_DOMAIN_GTT, 0);
199
200 if (!dma_bo->bo) {
201 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
202 goto again_alloc;
203 }
204 insert_at_head(&rmesa->dma.reserved, dma_bo);
205 } else {
206 /* We push and pop buffers from end of list so we can keep
207 counter on unused buffers for later freeing them from
208 begin of list */
209 dma_bo = last_elem(&rmesa->dma.free);
210 assert(dma_bo->bo->cref == 1);
211 remove_from_list(dma_bo);
212 insert_at_head(&rmesa->dma.reserved, dma_bo);
213 }
214
215 rmesa->dma.current_used = 0;
216 rmesa->dma.current_vertexptr = 0;
217
218 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
219 first_elem(&rmesa->dma.reserved)->bo,
220 RADEON_GEM_DOMAIN_GTT, 0))
221 fprintf(stderr,"failure to revalidate BOs - badness\n");
222
223 if (is_empty_list(&rmesa->dma.reserved)) {
224 /* Cmd buff have been flushed in radeon_revalidate_bos */
225 goto again_alloc;
226 }
227
228 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
229 }
230
231 /* Allocates a region from rmesa->dma.current. If there isn't enough
232 * space in current, grab a new buffer (and discard what was left of current)
233 */
234 void radeonAllocDmaRegion(radeonContextPtr rmesa,
235 struct radeon_bo **pbo, int *poffset,
236 int bytes, int alignment)
237 {
238 if (RADEON_DEBUG & RADEON_IOCTL)
239 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
240
241 if (rmesa->dma.flush)
242 rmesa->dma.flush(rmesa->glCtx);
243
244 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
245
246 alignment--;
247 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
248
249 if (is_empty_list(&rmesa->dma.reserved)
250 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
251 radeonRefillCurrentDmaRegion(rmesa, bytes);
252
253 *poffset = rmesa->dma.current_used;
254 *pbo = first_elem(&rmesa->dma.reserved)->bo;
255 radeon_bo_ref(*pbo);
256
257 /* Always align to at least 16 bytes */
258 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
259 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
260
261 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
262 }
263
264 void radeonFreeDmaRegions(radeonContextPtr rmesa)
265 {
266 struct radeon_dma_bo *dma_bo;
267 struct radeon_dma_bo *temp;
268 if (RADEON_DEBUG & RADEON_DMA)
269 fprintf(stderr, "%s\n", __FUNCTION__);
270
271 foreach_s(dma_bo, temp, &rmesa->dma.free) {
272 remove_from_list(dma_bo);
273 radeon_bo_unref(dma_bo->bo);
274 FREE(dma_bo);
275 }
276
277 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
278 remove_from_list(dma_bo);
279 radeon_bo_unref(dma_bo->bo);
280 FREE(dma_bo);
281 }
282
283 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
284 remove_from_list(dma_bo);
285 radeon_bo_unmap(dma_bo->bo);
286 radeon_bo_unref(dma_bo->bo);
287 FREE(dma_bo);
288 }
289 }
290
291 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
292 {
293 if (is_empty_list(&rmesa->dma.reserved))
294 return;
295
296 if (RADEON_DEBUG & RADEON_IOCTL)
297 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
298 rmesa->dma.current_used -= return_bytes;
299 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
300 }
301
302 static int radeon_bo_is_idle(struct radeon_bo* bo)
303 {
304 uint32_t domain;
305 int ret = radeon_bo_is_busy(bo, &domain);
306 if (ret == -EINVAL) {
307 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
308 "This may cause small performance drop for you.\n");
309 }
310 return ret != -EBUSY;
311 }
312
313 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
314 {
315 struct radeon_dma_bo *dma_bo;
316 struct radeon_dma_bo *temp;
317 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
318 const int time = rmesa->dma.free.expire_counter;
319
320 if (RADEON_DEBUG & RADEON_DMA) {
321 size_t free = 0,
322 wait = 0,
323 reserved = 0;
324 foreach(dma_bo, &rmesa->dma.free)
325 ++free;
326
327 foreach(dma_bo, &rmesa->dma.wait)
328 ++wait;
329
330 foreach(dma_bo, &rmesa->dma.reserved)
331 ++reserved;
332
333 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
334 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
335 }
336
337 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
338 /* request updated cs processing information from kernel */
339 legacy_track_pending(rmesa->radeonScreen->bom, 0);
340 }
341 /* move waiting bos to free list.
342 wait list provides gpu time to handle data before reuse */
343 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
344 if (dma_bo->expire_counter == time) {
345 WARN_ONCE("Leaking dma buffer object!\n");
346 radeon_bo_unref(dma_bo->bo);
347 remove_from_list(dma_bo);
348 FREE(dma_bo);
349 continue;
350 }
351 /* free objects that are too small to be used because of large request */
352 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
353 radeon_bo_unref(dma_bo->bo);
354 remove_from_list(dma_bo);
355 FREE(dma_bo);
356 continue;
357 }
358 if (!radeon_bo_is_idle(dma_bo->bo))
359 continue;
360 remove_from_list(dma_bo);
361 dma_bo->expire_counter = expire_at;
362 insert_at_tail(&rmesa->dma.free, dma_bo);
363 }
364
365 /* unmap the last dma region */
366 if (!is_empty_list(&rmesa->dma.reserved))
367 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
368 /* move reserved to wait list */
369 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
370 /* free objects that are too small to be used because of large request */
371 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
372 radeon_bo_unref(dma_bo->bo);
373 remove_from_list(dma_bo);
374 FREE(dma_bo);
375 continue;
376 }
377 remove_from_list(dma_bo);
378 dma_bo->expire_counter = expire_at;
379 insert_at_tail(&rmesa->dma.wait, dma_bo);
380 }
381
382 /* free bos that have been unused for some time */
383 foreach_s(dma_bo, temp, &rmesa->dma.free) {
384 if (dma_bo->expire_counter != time)
385 break;
386 remove_from_list(dma_bo);
387 radeon_bo_unref(dma_bo->bo);
388 FREE(dma_bo);
389 }
390
391 }
392
393
394 /* Flush vertices in the current dma region.
395 */
396 void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
397 {
398 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
399 struct radeon_dma *dma = &rmesa->dma;
400
401
402 if (RADEON_DEBUG & RADEON_IOCTL)
403 fprintf(stderr, "%s\n", __FUNCTION__);
404 dma->flush = NULL;
405
406 if (!is_empty_list(&dma->reserved)) {
407 GLuint current_offset = dma->current_used;
408
409 assert (dma->current_used +
410 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
411 dma->current_vertexptr);
412
413 if (dma->current_used != dma->current_vertexptr) {
414 dma->current_used = dma->current_vertexptr;
415
416 rmesa->vtbl.swtcl_flush(ctx, current_offset);
417 }
418 rmesa->swtcl.numverts = 0;
419 }
420 }
421 /* Alloc space in the current dma region.
422 */
423 void *
424 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
425 {
426 GLuint bytes = vsize * nverts;
427 void *head;
428 if (RADEON_DEBUG & RADEON_IOCTL)
429 fprintf(stderr, "%s\n", __FUNCTION__);
430 if(is_empty_list(&rmesa->dma.reserved)
431 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
432 if (rmesa->dma.flush) {
433 rmesa->dma.flush(rmesa->glCtx);
434 }
435
436 radeonRefillCurrentDmaRegion(rmesa, bytes);
437
438 return NULL;
439 }
440
441 if (!rmesa->dma.flush) {
442 /* if cmdbuf flushed DMA restart */
443 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
444 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
445 }
446
447 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
448 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
449 ASSERT( rmesa->dma.current_used +
450 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
451 rmesa->dma.current_vertexptr );
452
453 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
454 rmesa->dma.current_vertexptr += bytes;
455 rmesa->swtcl.numverts += nverts;
456 return head;
457 }
458
459 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
460 {
461 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
462 int i;
463 if (RADEON_DEBUG & RADEON_IOCTL)
464 fprintf(stderr, "%s\n", __FUNCTION__);
465
466 if (radeon->dma.flush) {
467 radeon->dma.flush(radeon->glCtx);
468 }
469 for (i = 0; i < radeon->tcl.aos_count; i++) {
470 if (radeon->tcl.aos[i].bo) {
471 radeon_bo_unref(radeon->tcl.aos[i].bo);
472 radeon->tcl.aos[i].bo = NULL;
473
474 }
475 }
476 }