cell: build identity driver too
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
36
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
39 do { \
40 int __tmp; \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
43 : "0" (nr), \
44 "D" ((long)dst), \
45 "S" ((long)src) ); \
46 } while (0)
47 #else
48 #define COPY_DWORDS( dst, src, nr ) \
49 do { \
50 int j; \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
53 dst += nr; \
54 } while (0)
55 #endif
56
57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58 {
59 int i;
60
61 if (RADEON_DEBUG & RADEON_VERTS)
62 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__, count, stride, (void *)out, (void *)data);
64
65 if (stride == 4)
66 COPY_DWORDS(out, data, count);
67 else
68 for (i = 0; i < count; i++) {
69 out[0] = *(int *)data;
70 out++;
71 data += stride;
72 }
73 }
74
75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76 {
77 int i;
78
79 if (RADEON_DEBUG & RADEON_VERTS)
80 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__, count, stride, (void *)out, (void *)data);
82
83 if (stride == 8)
84 COPY_DWORDS(out, data, count * 2);
85 else
86 for (i = 0; i < count; i++) {
87 out[0] = *(int *)data;
88 out[1] = *(int *)(data + 4);
89 out += 2;
90 data += stride;
91 }
92 }
93
94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95 {
96 int i;
97
98 if (RADEON_DEBUG & RADEON_VERTS)
99 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__, count, stride, (void *)out, (void *)data);
101
102 if (stride == 12) {
103 COPY_DWORDS(out, data, count * 3);
104 }
105 else
106 for (i = 0; i < count; i++) {
107 out[0] = *(int *)data;
108 out[1] = *(int *)(data + 4);
109 out[2] = *(int *)(data + 8);
110 out += 3;
111 data += stride;
112 }
113 }
114
115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116 {
117 int i;
118
119 if (RADEON_DEBUG & RADEON_VERTS)
120 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__, count, stride, (void *)out, (void *)data);
122
123 if (stride == 16)
124 COPY_DWORDS(out, data, count * 4);
125 else
126 for (i = 0; i < count; i++) {
127 out[0] = *(int *)data;
128 out[1] = *(int *)(data + 4);
129 out[2] = *(int *)(data + 8);
130 out[3] = *(int *)(data + 12);
131 out += 4;
132 data += stride;
133 }
134 }
135
136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137 const GLvoid * data, int size, int stride, int count)
138 {
139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140 uint32_t *out;
141
142 if (stride == 0) {
143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144 count = 1;
145 aos->stride = 0;
146 } else {
147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148 aos->stride = size;
149 }
150
151 aos->components = size;
152 aos->count = count;
153
154 radeon_bo_map(aos->bo, 1);
155 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
156 switch (size) {
157 case 1: radeonEmitVec4(out, data, stride, count); break;
158 case 2: radeonEmitVec8(out, data, stride, count); break;
159 case 3: radeonEmitVec12(out, data, stride, count); break;
160 case 4: radeonEmitVec16(out, data, stride, count); break;
161 default:
162 assert(0);
163 break;
164 }
165 radeon_bo_unmap(aos->bo);
166 }
167
168 void radeon_init_dma(radeonContextPtr rmesa)
169 {
170 make_empty_list(&rmesa->dma.free);
171 make_empty_list(&rmesa->dma.wait);
172 make_empty_list(&rmesa->dma.reserved);
173 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
174 }
175
176 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
177 {
178 struct radeon_dma_bo *dma_bo = NULL;
179 /* we set minimum sizes to at least requested size
180 aligned to next 16 bytes. */
181 if (size > rmesa->dma.minimum_size)
182 rmesa->dma.minimum_size = (size + 15) & (~15);
183
184 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
185 __FUNCTION__, size, rmesa->dma.minimum_size);
186
187 if (!is_empty_list(&rmesa->dma.reserved))
188 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
189
190 if (is_empty_list(&rmesa->dma.free)
191 || last_elem(&rmesa->dma.free)->bo->size < size) {
192 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
193 assert(dma_bo);
194
195 again_alloc:
196 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
197 0, rmesa->dma.minimum_size, 4,
198 RADEON_GEM_DOMAIN_GTT, 0);
199
200 if (!dma_bo->bo) {
201 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
202 goto again_alloc;
203 }
204 insert_at_head(&rmesa->dma.reserved, dma_bo);
205 } else {
206 /* We push and pop buffers from end of list so we can keep
207 counter on unused buffers for later freeing them from
208 begin of list */
209 dma_bo = last_elem(&rmesa->dma.free);
210 remove_from_list(dma_bo);
211 insert_at_head(&rmesa->dma.reserved, dma_bo);
212 }
213
214 rmesa->dma.current_used = 0;
215 rmesa->dma.current_vertexptr = 0;
216
217 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
218 first_elem(&rmesa->dma.reserved)->bo,
219 RADEON_GEM_DOMAIN_GTT, 0))
220 fprintf(stderr,"failure to revalidate BOs - badness\n");
221
222 if (is_empty_list(&rmesa->dma.reserved)) {
223 /* Cmd buff have been flushed in radeon_revalidate_bos */
224 goto again_alloc;
225 }
226 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
227 }
228
229 /* Allocates a region from rmesa->dma.current. If there isn't enough
230 * space in current, grab a new buffer (and discard what was left of current)
231 */
232 void radeonAllocDmaRegion(radeonContextPtr rmesa,
233 struct radeon_bo **pbo, int *poffset,
234 int bytes, int alignment)
235 {
236 if (RADEON_DEBUG & RADEON_IOCTL)
237 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
238
239 if (rmesa->dma.flush)
240 rmesa->dma.flush(rmesa->glCtx);
241
242 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
243
244 alignment--;
245 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
246
247 if (is_empty_list(&rmesa->dma.reserved)
248 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
249 radeonRefillCurrentDmaRegion(rmesa, bytes);
250
251 *poffset = rmesa->dma.current_used;
252 *pbo = first_elem(&rmesa->dma.reserved)->bo;
253 radeon_bo_ref(*pbo);
254
255 /* Always align to at least 16 bytes */
256 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
257 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
258
259 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
260 }
261
262 void radeonFreeDmaRegions(radeonContextPtr rmesa)
263 {
264 struct radeon_dma_bo *dma_bo;
265 struct radeon_dma_bo *temp;
266 if (RADEON_DEBUG & RADEON_DMA)
267 fprintf(stderr, "%s\n", __FUNCTION__);
268
269 foreach_s(dma_bo, temp, &rmesa->dma.free) {
270 remove_from_list(dma_bo);
271 radeon_bo_unref(dma_bo->bo);
272 FREE(dma_bo);
273 }
274
275 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
276 remove_from_list(dma_bo);
277 radeon_bo_unref(dma_bo->bo);
278 FREE(dma_bo);
279 }
280
281 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
282 remove_from_list(dma_bo);
283 radeon_bo_unref(dma_bo->bo);
284 FREE(dma_bo);
285 }
286 }
287
288 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
289 {
290 if (is_empty_list(&rmesa->dma.reserved))
291 return;
292
293 if (RADEON_DEBUG & RADEON_IOCTL)
294 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
295 rmesa->dma.current_used -= return_bytes;
296 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
297 }
298
299 static int radeon_bo_is_idle(struct radeon_bo* bo)
300 {
301 uint32_t domain;
302 int ret = radeon_bo_is_busy(bo, &domain);
303 if (ret == -EINVAL) {
304 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
305 "This may cause small performance drop for you.\n");
306 }
307 return ret != -EBUSY;
308 }
309
310 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
311 {
312 struct radeon_dma_bo *dma_bo;
313 struct radeon_dma_bo *temp;
314 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
315 const int time = rmesa->dma.free.expire_counter;
316
317 if (RADEON_DEBUG & RADEON_DMA) {
318 size_t free = 0,
319 wait = 0,
320 reserved = 0;
321 foreach(dma_bo, &rmesa->dma.free)
322 ++free;
323
324 foreach(dma_bo, &rmesa->dma.wait)
325 ++wait;
326
327 foreach(dma_bo, &rmesa->dma.reserved)
328 ++reserved;
329
330 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
331 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
332 }
333
334 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
335 /* request updated cs processing information from kernel */
336 legacy_track_pending(rmesa->radeonScreen->bom, 0);
337 }
338
339 if (!is_empty_list(&rmesa->dma.reserved))
340 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
341
342 /* move waiting bos to free list.
343 wait list provides gpu time to handle data before reuse */
344 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
345 if (dma_bo->expire_counter == time) {
346 WARN_ONCE("Leaking dma buffer object!\n");
347 radeon_bo_unref(dma_bo->bo);
348 remove_from_list(dma_bo);
349 FREE(dma_bo);
350 continue;
351 }
352 /* free objects that are too small to be used because of large request */
353 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
354 radeon_bo_unref(dma_bo->bo);
355 remove_from_list(dma_bo);
356 FREE(dma_bo);
357 continue;
358 }
359 if (!radeon_bo_is_idle(dma_bo->bo)) {
360 if (rmesa->radeonScreen->driScreen->dri2.enabled)
361 break;
362 continue;
363 }
364 remove_from_list(dma_bo);
365 dma_bo->expire_counter = expire_at;
366 insert_at_tail(&rmesa->dma.free, dma_bo);
367 }
368
369 /* move reserved to wait list */
370 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
371 /* free objects that are too small to be used because of large request */
372 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
373 radeon_bo_unref(dma_bo->bo);
374 remove_from_list(dma_bo);
375 FREE(dma_bo);
376 continue;
377 }
378 remove_from_list(dma_bo);
379 dma_bo->expire_counter = expire_at;
380 insert_at_tail(&rmesa->dma.wait, dma_bo);
381 }
382
383 /* free bos that have been unused for some time */
384 foreach_s(dma_bo, temp, &rmesa->dma.free) {
385 if (dma_bo->expire_counter != time)
386 break;
387 remove_from_list(dma_bo);
388 radeon_bo_unref(dma_bo->bo);
389 FREE(dma_bo);
390 }
391
392 }
393
394
395 /* Flush vertices in the current dma region.
396 */
397 void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
398 {
399 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
400 struct radeon_dma *dma = &rmesa->dma;
401
402 if (RADEON_DEBUG & RADEON_IOCTL)
403 fprintf(stderr, "%s\n", __FUNCTION__);
404 dma->flush = NULL;
405
406 radeon_bo_unmap(rmesa->swtcl.bo);
407
408 if (!is_empty_list(&dma->reserved)) {
409 GLuint current_offset = dma->current_used;
410
411 assert (dma->current_used +
412 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
413 dma->current_vertexptr);
414
415 if (dma->current_used != dma->current_vertexptr) {
416 dma->current_used = dma->current_vertexptr;
417
418 rmesa->vtbl.swtcl_flush(ctx, current_offset);
419 }
420 rmesa->swtcl.numverts = 0;
421 }
422 radeon_bo_unref(rmesa->swtcl.bo);
423 rmesa->swtcl.bo = NULL;
424 }
425 /* Alloc space in the current dma region.
426 */
427 void *
428 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
429 {
430 GLuint bytes = vsize * nverts;
431 void *head;
432 if (RADEON_DEBUG & RADEON_IOCTL)
433 fprintf(stderr, "%s\n", __FUNCTION__);
434
435 if(is_empty_list(&rmesa->dma.reserved)
436 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
437 if (rmesa->dma.flush) {
438 rmesa->dma.flush(rmesa->glCtx);
439 }
440
441 radeonRefillCurrentDmaRegion(rmesa, bytes);
442
443 return NULL;
444 }
445
446 if (!rmesa->dma.flush) {
447 /* if cmdbuf flushed DMA restart */
448 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
449 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
450 }
451
452 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
453 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
454 ASSERT( rmesa->dma.current_used +
455 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
456 rmesa->dma.current_vertexptr );
457
458 if (!rmesa->swtcl.bo) {
459 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
460 radeon_bo_ref(rmesa->swtcl.bo);
461 radeon_bo_map(rmesa->swtcl.bo, 1);
462 }
463
464 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
465 rmesa->dma.current_vertexptr += bytes;
466 rmesa->swtcl.numverts += nverts;
467 return head;
468 }
469
470 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
471 {
472 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
473 int i;
474 if (RADEON_DEBUG & RADEON_IOCTL)
475 fprintf(stderr, "%s\n", __FUNCTION__);
476
477 if (radeon->dma.flush) {
478 radeon->dma.flush(radeon->glCtx);
479 }
480 for (i = 0; i < radeon->tcl.aos_count; i++) {
481 if (radeon->tcl.aos[i].bo) {
482 radeon_bo_unref(radeon->tcl.aos[i].bo);
483 radeon->tcl.aos[i].bo = NULL;
484
485 }
486 }
487 }