40082d0798f14d0955565ed3d1a3707f98bfb39f
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include "radeon_common.h"
34 #include "main/simple_list.h"
35
36 #if defined(USE_X86_ASM)
37 #define COPY_DWORDS( dst, src, nr ) \
38 do { \
39 int __tmp; \
40 __asm__ __volatile__( "rep ; movsl" \
41 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
42 : "0" (nr), \
43 "D" ((long)dst), \
44 "S" ((long)src) ); \
45 } while (0)
46 #else
47 #define COPY_DWORDS( dst, src, nr ) \
48 do { \
49 int j; \
50 for ( j = 0 ; j < nr ; j++ ) \
51 dst[j] = ((int *)src)[j]; \
52 dst += nr; \
53 } while (0)
54 #endif
55
56 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
57 {
58 int i;
59
60 if (RADEON_DEBUG & DEBUG_VERTS)
61 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
62 __FUNCTION__, count, stride, (void *)out, (void *)data);
63
64 if (stride == 4)
65 COPY_DWORDS(out, data, count);
66 else
67 for (i = 0; i < count; i++) {
68 out[0] = *(int *)data;
69 out++;
70 data += stride;
71 }
72 }
73
74 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
75 {
76 int i;
77
78 if (RADEON_DEBUG & DEBUG_VERTS)
79 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
80 __FUNCTION__, count, stride, (void *)out, (void *)data);
81
82 if (stride == 8)
83 COPY_DWORDS(out, data, count * 2);
84 else
85 for (i = 0; i < count; i++) {
86 out[0] = *(int *)data;
87 out[1] = *(int *)(data + 4);
88 out += 2;
89 data += stride;
90 }
91 }
92
93 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
94 {
95 int i;
96
97 if (RADEON_DEBUG & DEBUG_VERTS)
98 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
99 __FUNCTION__, count, stride, (void *)out, (void *)data);
100
101 if (stride == 12) {
102 COPY_DWORDS(out, data, count * 3);
103 }
104 else
105 for (i = 0; i < count; i++) {
106 out[0] = *(int *)data;
107 out[1] = *(int *)(data + 4);
108 out[2] = *(int *)(data + 8);
109 out += 3;
110 data += stride;
111 }
112 }
113
114 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
115 {
116 int i;
117
118 if (RADEON_DEBUG & DEBUG_VERTS)
119 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
120 __FUNCTION__, count, stride, (void *)out, (void *)data);
121
122 if (stride == 16)
123 COPY_DWORDS(out, data, count * 4);
124 else
125 for (i = 0; i < count; i++) {
126 out[0] = *(int *)data;
127 out[1] = *(int *)(data + 4);
128 out[2] = *(int *)(data + 8);
129 out[3] = *(int *)(data + 12);
130 out += 4;
131 data += stride;
132 }
133 }
134
135 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
136 const GLvoid * data, int size, int stride, int count)
137 {
138 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
139 uint32_t *out;
140
141 if (stride == 0) {
142 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
143 count = 1;
144 aos->stride = 0;
145 } else {
146 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
147 aos->stride = size;
148 }
149
150 aos->components = size;
151 aos->count = count;
152
153 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
154 switch (size) {
155 case 1: radeonEmitVec4(out, data, stride, count); break;
156 case 2: radeonEmitVec8(out, data, stride, count); break;
157 case 3: radeonEmitVec12(out, data, stride, count); break;
158 case 4: radeonEmitVec16(out, data, stride, count); break;
159 default:
160 assert(0);
161 break;
162 }
163 }
164
165 void radeon_init_dma(radeonContextPtr rmesa)
166 {
167 make_empty_list(&rmesa->dma.free);
168 make_empty_list(&rmesa->dma.wait);
169 make_empty_list(&rmesa->dma.reserved);
170 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
171 }
172
173 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
174 {
175 /* we set minimum sizes to at least requested size
176 aligned to next 16 bytes. */
177 if (size > rmesa->dma.minimum_size)
178 rmesa->dma.minimum_size = (size + 15) & (~15);
179
180 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
181 fprintf(stderr, "%s\n", __FUNCTION__);
182
183 if (rmesa->dma.flush) {
184 rmesa->dma.flush(rmesa->glCtx);
185 }
186
187 /* unmap old reserved bo */
188 if (!is_empty_list(&rmesa->dma.reserved))
189 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
190
191 if (is_empty_list(&rmesa->dma.free)
192 || last_elem(&rmesa->dma.free)->bo->size < size) {
193 struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo));
194 assert(dma_bo);
195
196 again_alloc:
197 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
198 0, rmesa->dma.minimum_size, 4,
199 RADEON_GEM_DOMAIN_GTT, 0);
200
201 if (!dma_bo->bo) {
202 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
203 goto again_alloc;
204 }
205 insert_at_head(&rmesa->dma.reserved, dma_bo);
206 } else {
207 struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free);
208 assert(dma_bo->bo->cref == 1);
209 remove_from_list(dma_bo);
210 insert_at_head(&rmesa->dma.reserved, dma_bo);
211 }
212
213 rmesa->dma.current_used = 0;
214 rmesa->dma.current_vertexptr = 0;
215
216 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
217 first_elem(&rmesa->dma.reserved)->bo,
218 RADEON_GEM_DOMAIN_GTT, 0))
219 fprintf(stderr,"failure to revalidate BOs - badness\n");
220
221 if (is_empty_list(&rmesa->dma.reserved)) {
222 /* Cmd buff have been flushed in radeon_revalidate_bos */
223 goto again_alloc;
224 }
225
226 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
227 }
228
229 /* Allocates a region from rmesa->dma.current. If there isn't enough
230 * space in current, grab a new buffer (and discard what was left of current)
231 */
232 void radeonAllocDmaRegion(radeonContextPtr rmesa,
233 struct radeon_bo **pbo, int *poffset,
234 int bytes, int alignment)
235 {
236 if (RADEON_DEBUG & DEBUG_IOCTL)
237 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
238
239 if (rmesa->dma.flush)
240 rmesa->dma.flush(rmesa->glCtx);
241
242 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
243
244 alignment--;
245 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
246
247 if (is_empty_list(&rmesa->dma.reserved)
248 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
249 radeonRefillCurrentDmaRegion(rmesa, bytes);
250
251 *poffset = rmesa->dma.current_used;
252 *pbo = first_elem(&rmesa->dma.reserved)->bo;
253 radeon_bo_ref(*pbo);
254
255 /* Always align to at least 16 bytes */
256 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
257 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
258
259 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
260 }
261
262 void radeonFreeDmaRegions(radeonContextPtr rmesa)
263 {
264 struct radeon_dma_bo *dma_bo;
265 struct radeon_dma_bo *temp;
266 if (RADEON_DEBUG & DEBUG_DMA)
267 fprintf(stderr, "%s\n", __FUNCTION__);
268
269 foreach_s(dma_bo, temp, &rmesa->dma.free) {
270 remove_from_list(dma_bo);
271 radeon_bo_unref(dma_bo->bo);
272 FREE(dma_bo);
273 }
274
275 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
276 remove_from_list(dma_bo);
277 radeon_bo_unref(dma_bo->bo);
278 FREE(dma_bo);
279 }
280
281 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
282 remove_from_list(dma_bo);
283 radeon_bo_unmap(dma_bo->bo);
284 radeon_bo_unref(dma_bo->bo);
285 FREE(dma_bo);
286 }
287 }
288
289 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
290 {
291 if (is_empty_list(&rmesa->dma.reserved))
292 return;
293
294 if (RADEON_DEBUG & DEBUG_IOCTL)
295 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
296 rmesa->dma.current_used -= return_bytes;
297 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
298 }
299
300 static int radeon_bo_is_idle(struct radeon_bo* bo)
301 {
302 return bo->cref == 1;
303 }
304
305 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
306 {
307 struct radeon_dma_bo *dma_bo;
308 struct radeon_dma_bo *temp;
309 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
310 const int time = rmesa->dma.free.expire_counter;
311
312 if (RADEON_DEBUG & DEBUG_DMA) {
313 size_t free = 0,
314 wait = 0,
315 reserved = 0;
316 foreach(dma_bo, &rmesa->dma.free)
317 ++free;
318
319 foreach(dma_bo, &rmesa->dma.wait)
320 ++wait;
321
322 foreach(dma_bo, &rmesa->dma.reserved)
323 ++reserved;
324
325 fprintf(stderr, "%s: free %u, wait %u, reserved %u, minimum_size: %u\n",
326 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
327 }
328
329 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
330 /* request updated cs processing information from kernel */
331 legacy_track_pending(rmesa->radeonScreen->bom, 0);
332 }
333 /* move waiting bos to free list.
334 wait list provides gpu time to handle data before reuse */
335 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
336 if (dma_bo->expire_counter == time) {
337 WARN_ONCE("Leaking dma buffer object!\n");
338 radeon_bo_unref(dma_bo->bo);
339 remove_from_list(dma_bo);
340 FREE(dma_bo);
341 continue;
342 }
343 /* free objects that are too small to be used because of large request */
344 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
345 radeon_bo_unref(dma_bo->bo);
346 remove_from_list(dma_bo);
347 FREE(dma_bo);
348 continue;
349 }
350 if (!radeon_bo_is_idle(dma_bo->bo))
351 continue;
352 remove_from_list(dma_bo);
353 dma_bo->expire_counter = expire_at;
354 insert_at_tail(&rmesa->dma.free, dma_bo);
355 }
356
357 /* unmap the last dma region */
358 if (!is_empty_list(&rmesa->dma.reserved))
359 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
360 /* move reserved to wait list */
361 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
362 /* free objects that are too small to be used because of large request */
363 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
364 radeon_bo_unref(dma_bo->bo);
365 remove_from_list(dma_bo);
366 FREE(dma_bo);
367 continue;
368 }
369 remove_from_list(dma_bo);
370 dma_bo->expire_counter = expire_at;
371 insert_at_tail(&rmesa->dma.wait, dma_bo);
372 }
373
374 /* free bos that have been unused for some time */
375 foreach_s(dma_bo, temp, &rmesa->dma.free) {
376 if (dma_bo->expire_counter != time)
377 break;
378 remove_from_list(dma_bo);
379 radeon_bo_unref(dma_bo->bo);
380 FREE(dma_bo);
381 }
382
383 }
384
385
386 /* Flush vertices in the current dma region.
387 */
388 void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
389 {
390 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
391 struct radeon_dma *dma = &rmesa->dma;
392
393
394 if (RADEON_DEBUG & DEBUG_IOCTL)
395 fprintf(stderr, "%s\n", __FUNCTION__);
396 dma->flush = NULL;
397
398 if (!is_empty_list(&dma->reserved)) {
399 GLuint current_offset = dma->current_used;
400
401 assert (dma->current_used +
402 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
403 dma->current_vertexptr);
404
405 if (dma->current_used != dma->current_vertexptr) {
406 dma->current_used = dma->current_vertexptr;
407
408 rmesa->vtbl.swtcl_flush(ctx, current_offset);
409 }
410 rmesa->swtcl.numverts = 0;
411 }
412 }
413 /* Alloc space in the current dma region.
414 */
415 void *
416 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
417 {
418 GLuint bytes = vsize * nverts;
419 void *head;
420 restart:
421 if (RADEON_DEBUG & DEBUG_IOCTL)
422 fprintf(stderr, "%s\n", __FUNCTION__);
423 if (is_empty_list(&rmesa->dma.reserved)
424 || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
425 radeonRefillCurrentDmaRegion(rmesa, bytes);
426 }
427
428 if (!rmesa->dma.flush) {
429 /* make sure we have enough space to use this in cmdbuf */
430 rcommonEnsureCmdBufSpace(rmesa,
431 rmesa->hw.max_state_size + (20*sizeof(int)),
432 __FUNCTION__);
433 /* if cmdbuf flushed DMA restart */
434 if (is_empty_list(&rmesa->dma.reserved))
435 goto restart;
436 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
437 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
438 }
439
440 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
441 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
442 ASSERT( rmesa->dma.current_used +
443 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
444 rmesa->dma.current_vertexptr );
445
446 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
447 rmesa->dma.current_vertexptr += bytes;
448 rmesa->swtcl.numverts += nverts;
449 return head;
450 }
451
452 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
453 {
454 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
455 int i;
456 if (RADEON_DEBUG & DEBUG_IOCTL)
457 fprintf(stderr, "%s\n", __FUNCTION__);
458
459 if (radeon->dma.flush) {
460 radeon->dma.flush(radeon->glCtx);
461 }
462 for (i = 0; i < radeon->tcl.aos_count; i++) {
463 if (radeon->tcl.aos[i].bo) {
464 radeon_bo_unref(radeon->tcl.aos[i].bo);
465 radeon->tcl.aos[i].bo = NULL;
466
467 }
468 }
469 }