radeon/r600: Fix remaining warnings when building 64 bit binary.
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
36
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
39 do { \
40 int __tmp; \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
43 : "0" (nr), \
44 "D" ((long)dst), \
45 "S" ((long)src) ); \
46 } while (0)
47 #else
48 #define COPY_DWORDS( dst, src, nr ) \
49 do { \
50 int j; \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
53 dst += nr; \
54 } while (0)
55 #endif
56
57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58 {
59 int i;
60
61 if (RADEON_DEBUG & DEBUG_VERTS)
62 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__, count, stride, (void *)out, (void *)data);
64
65 if (stride == 4)
66 COPY_DWORDS(out, data, count);
67 else
68 for (i = 0; i < count; i++) {
69 out[0] = *(int *)data;
70 out++;
71 data += stride;
72 }
73 }
74
75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76 {
77 int i;
78
79 if (RADEON_DEBUG & DEBUG_VERTS)
80 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__, count, stride, (void *)out, (void *)data);
82
83 if (stride == 8)
84 COPY_DWORDS(out, data, count * 2);
85 else
86 for (i = 0; i < count; i++) {
87 out[0] = *(int *)data;
88 out[1] = *(int *)(data + 4);
89 out += 2;
90 data += stride;
91 }
92 }
93
94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95 {
96 int i;
97
98 if (RADEON_DEBUG & DEBUG_VERTS)
99 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__, count, stride, (void *)out, (void *)data);
101
102 if (stride == 12) {
103 COPY_DWORDS(out, data, count * 3);
104 }
105 else
106 for (i = 0; i < count; i++) {
107 out[0] = *(int *)data;
108 out[1] = *(int *)(data + 4);
109 out[2] = *(int *)(data + 8);
110 out += 3;
111 data += stride;
112 }
113 }
114
115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116 {
117 int i;
118
119 if (RADEON_DEBUG & DEBUG_VERTS)
120 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__, count, stride, (void *)out, (void *)data);
122
123 if (stride == 16)
124 COPY_DWORDS(out, data, count * 4);
125 else
126 for (i = 0; i < count; i++) {
127 out[0] = *(int *)data;
128 out[1] = *(int *)(data + 4);
129 out[2] = *(int *)(data + 8);
130 out[3] = *(int *)(data + 12);
131 out += 4;
132 data += stride;
133 }
134 }
135
136 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137 const GLvoid * data, int size, int stride, int count)
138 {
139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140 uint32_t *out;
141
142 if (stride == 0) {
143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144 count = 1;
145 aos->stride = 0;
146 } else {
147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148 aos->stride = size;
149 }
150
151 aos->components = size;
152 aos->count = count;
153
154 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
155 switch (size) {
156 case 1: radeonEmitVec4(out, data, stride, count); break;
157 case 2: radeonEmitVec8(out, data, stride, count); break;
158 case 3: radeonEmitVec12(out, data, stride, count); break;
159 case 4: radeonEmitVec16(out, data, stride, count); break;
160 default:
161 assert(0);
162 break;
163 }
164 }
165
166 void radeon_init_dma(radeonContextPtr rmesa)
167 {
168 make_empty_list(&rmesa->dma.free);
169 make_empty_list(&rmesa->dma.wait);
170 make_empty_list(&rmesa->dma.reserved);
171 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
172 }
173
174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
175 {
176 struct radeon_dma_bo *dma_bo = NULL;
177 /* we set minimum sizes to at least requested size
178 aligned to next 16 bytes. */
179 if (size > rmesa->dma.minimum_size)
180 rmesa->dma.minimum_size = (size + 15) & (~15);
181
182 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
183 fprintf(stderr, "%s\n", __FUNCTION__);
184
185 if (rmesa->dma.flush) {
186 rmesa->dma.flush(rmesa->glCtx);
187 }
188
189 /* unmap old reserved bo */
190 if (!is_empty_list(&rmesa->dma.reserved))
191 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
192
193 if (is_empty_list(&rmesa->dma.free)
194 || last_elem(&rmesa->dma.free)->bo->size < size) {
195 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
196 assert(dma_bo);
197
198 again_alloc:
199 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
200 0, rmesa->dma.minimum_size, 4,
201 RADEON_GEM_DOMAIN_GTT, 0);
202
203 if (!dma_bo->bo) {
204 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
205 goto again_alloc;
206 }
207 insert_at_head(&rmesa->dma.reserved, dma_bo);
208 } else {
209 /* We push and pop buffers from end of list so we can keep
210 counter on unused buffers for later freeing them from
211 begin of list */
212 dma_bo = last_elem(&rmesa->dma.free);
213 assert(dma_bo->bo->cref == 1);
214 remove_from_list(dma_bo);
215 insert_at_head(&rmesa->dma.reserved, dma_bo);
216 }
217
218 rmesa->dma.current_used = 0;
219 rmesa->dma.current_vertexptr = 0;
220
221 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
222 first_elem(&rmesa->dma.reserved)->bo,
223 RADEON_GEM_DOMAIN_GTT, 0))
224 fprintf(stderr,"failure to revalidate BOs - badness\n");
225
226 if (is_empty_list(&rmesa->dma.reserved)) {
227 /* Cmd buff have been flushed in radeon_revalidate_bos */
228 goto again_alloc;
229 }
230
231 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
232 }
233
234 /* Allocates a region from rmesa->dma.current. If there isn't enough
235 * space in current, grab a new buffer (and discard what was left of current)
236 */
237 void radeonAllocDmaRegion(radeonContextPtr rmesa,
238 struct radeon_bo **pbo, int *poffset,
239 int bytes, int alignment)
240 {
241 if (RADEON_DEBUG & DEBUG_IOCTL)
242 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
243
244 if (rmesa->dma.flush)
245 rmesa->dma.flush(rmesa->glCtx);
246
247 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
248
249 alignment--;
250 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
251
252 if (is_empty_list(&rmesa->dma.reserved)
253 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
254 radeonRefillCurrentDmaRegion(rmesa, bytes);
255
256 *poffset = rmesa->dma.current_used;
257 *pbo = first_elem(&rmesa->dma.reserved)->bo;
258 radeon_bo_ref(*pbo);
259
260 /* Always align to at least 16 bytes */
261 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
262 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
263
264 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
265 }
266
267 void radeonFreeDmaRegions(radeonContextPtr rmesa)
268 {
269 struct radeon_dma_bo *dma_bo = CALLOC_STRUCT(radeon_dma_bo);
270 struct radeon_dma_bo *temp;
271 if (RADEON_DEBUG & DEBUG_DMA)
272 fprintf(stderr, "%s\n", __FUNCTION__);
273
274 foreach_s(dma_bo, temp, &rmesa->dma.free) {
275 remove_from_list(dma_bo);
276 radeon_bo_unref(dma_bo->bo);
277 FREE(dma_bo);
278 }
279
280 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
281 remove_from_list(dma_bo);
282 radeon_bo_unref(dma_bo->bo);
283 FREE(dma_bo);
284 }
285
286 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
287 remove_from_list(dma_bo);
288 radeon_bo_unmap(dma_bo->bo);
289 radeon_bo_unref(dma_bo->bo);
290 FREE(dma_bo);
291 }
292 }
293
294 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
295 {
296 if (is_empty_list(&rmesa->dma.reserved))
297 return;
298
299 if (RADEON_DEBUG & DEBUG_IOCTL)
300 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
301 rmesa->dma.current_used -= return_bytes;
302 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
303 }
304
305 static int radeon_bo_is_idle(struct radeon_bo* bo)
306 {
307 uint32_t domain;
308 int ret = radeon_bo_is_busy(bo, &domain);
309 if (ret == -EINVAL) {
310 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
311 "This may cause small performance drop for you.\n");
312 }
313 return ret != -EBUSY;
314 }
315
316 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
317 {
318 struct radeon_dma_bo *dma_bo;
319 struct radeon_dma_bo *temp;
320 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
321 const int time = rmesa->dma.free.expire_counter;
322
323 if (RADEON_DEBUG & DEBUG_DMA) {
324 size_t free = 0,
325 wait = 0,
326 reserved = 0;
327 foreach(dma_bo, &rmesa->dma.free)
328 ++free;
329
330 foreach(dma_bo, &rmesa->dma.wait)
331 ++wait;
332
333 foreach(dma_bo, &rmesa->dma.reserved)
334 ++reserved;
335
336 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
337 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
338 }
339
340 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
341 /* request updated cs processing information from kernel */
342 legacy_track_pending(rmesa->radeonScreen->bom, 0);
343 }
344 /* move waiting bos to free list.
345 wait list provides gpu time to handle data before reuse */
346 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
347 if (dma_bo->expire_counter == time) {
348 WARN_ONCE("Leaking dma buffer object!\n");
349 radeon_bo_unref(dma_bo->bo);
350 remove_from_list(dma_bo);
351 FREE(dma_bo);
352 continue;
353 }
354 /* free objects that are too small to be used because of large request */
355 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
356 radeon_bo_unref(dma_bo->bo);
357 remove_from_list(dma_bo);
358 FREE(dma_bo);
359 continue;
360 }
361 if (!radeon_bo_is_idle(dma_bo->bo))
362 continue;
363 remove_from_list(dma_bo);
364 dma_bo->expire_counter = expire_at;
365 insert_at_tail(&rmesa->dma.free, dma_bo);
366 }
367
368 /* unmap the last dma region */
369 if (!is_empty_list(&rmesa->dma.reserved))
370 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
371 /* move reserved to wait list */
372 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
373 /* free objects that are too small to be used because of large request */
374 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
375 radeon_bo_unref(dma_bo->bo);
376 remove_from_list(dma_bo);
377 FREE(dma_bo);
378 continue;
379 }
380 remove_from_list(dma_bo);
381 dma_bo->expire_counter = expire_at;
382 insert_at_tail(&rmesa->dma.wait, dma_bo);
383 }
384
385 /* free bos that have been unused for some time */
386 foreach_s(dma_bo, temp, &rmesa->dma.free) {
387 if (dma_bo->expire_counter != time)
388 break;
389 remove_from_list(dma_bo);
390 radeon_bo_unref(dma_bo->bo);
391 FREE(dma_bo);
392 }
393
394 }
395
396
397 /* Flush vertices in the current dma region.
398 */
399 void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
400 {
401 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
402 struct radeon_dma *dma = &rmesa->dma;
403
404
405 if (RADEON_DEBUG & DEBUG_IOCTL)
406 fprintf(stderr, "%s\n", __FUNCTION__);
407 dma->flush = NULL;
408
409 if (!is_empty_list(&dma->reserved)) {
410 GLuint current_offset = dma->current_used;
411
412 assert (dma->current_used +
413 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
414 dma->current_vertexptr);
415
416 if (dma->current_used != dma->current_vertexptr) {
417 dma->current_used = dma->current_vertexptr;
418
419 rmesa->vtbl.swtcl_flush(ctx, current_offset);
420 }
421 rmesa->swtcl.numverts = 0;
422 }
423 }
424 /* Alloc space in the current dma region.
425 */
426 void *
427 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
428 {
429 GLuint bytes = vsize * nverts;
430 void *head;
431 restart:
432 if (RADEON_DEBUG & DEBUG_IOCTL)
433 fprintf(stderr, "%s\n", __FUNCTION__);
434 if (is_empty_list(&rmesa->dma.reserved)
435 || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
436 radeonRefillCurrentDmaRegion(rmesa, bytes);
437 }
438
439 if (!rmesa->dma.flush) {
440 /* make sure we have enough space to use this in cmdbuf */
441 rcommonEnsureCmdBufSpace(rmesa,
442 radeonCountStateEmitSize( rmesa ) + (20*sizeof(int)),
443 __FUNCTION__);
444 /* if cmdbuf flushed DMA restart */
445 if (is_empty_list(&rmesa->dma.reserved))
446 goto restart;
447 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
448 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
449 }
450
451 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
452 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
453 ASSERT( rmesa->dma.current_used +
454 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
455 rmesa->dma.current_vertexptr );
456
457 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
458 rmesa->dma.current_vertexptr += bytes;
459 rmesa->swtcl.numverts += nverts;
460 return head;
461 }
462
463 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
464 {
465 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
466 int i;
467 if (RADEON_DEBUG & DEBUG_IOCTL)
468 fprintf(stderr, "%s\n", __FUNCTION__);
469
470 if (radeon->dma.flush) {
471 radeon->dma.flush(radeon->glCtx);
472 }
473 for (i = 0; i < radeon->tcl.aos_count; i++) {
474 if (radeon->tcl.aos[i].bo) {
475 radeon_bo_unref(radeon->tcl.aos[i].bo);
476 radeon->tcl.aos[i].bo = NULL;
477
478 }
479 }
480 }