Drop GLcontext typedef and use struct gl_context instead
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
36
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
39 do { \
40 int __tmp; \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
43 : "0" (nr), \
44 "D" ((long)dst), \
45 "S" ((long)src) ); \
46 } while (0)
47 #else
48 #define COPY_DWORDS( dst, src, nr ) \
49 do { \
50 int j; \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
53 dst += nr; \
54 } while (0)
55 #endif
56
57 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58 {
59 int i;
60
61 if (RADEON_DEBUG & RADEON_VERTS)
62 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__, count, stride, (void *)out, (void *)data);
64
65 if (stride == 4)
66 COPY_DWORDS(out, data, count);
67 else
68 for (i = 0; i < count; i++) {
69 out[0] = *(int *)data;
70 out++;
71 data += stride;
72 }
73 }
74
75 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76 {
77 int i;
78
79 if (RADEON_DEBUG & RADEON_VERTS)
80 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__, count, stride, (void *)out, (void *)data);
82
83 if (stride == 8)
84 COPY_DWORDS(out, data, count * 2);
85 else
86 for (i = 0; i < count; i++) {
87 out[0] = *(int *)data;
88 out[1] = *(int *)(data + 4);
89 out += 2;
90 data += stride;
91 }
92 }
93
94 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95 {
96 int i;
97
98 if (RADEON_DEBUG & RADEON_VERTS)
99 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__, count, stride, (void *)out, (void *)data);
101
102 if (stride == 12) {
103 COPY_DWORDS(out, data, count * 3);
104 }
105 else
106 for (i = 0; i < count; i++) {
107 out[0] = *(int *)data;
108 out[1] = *(int *)(data + 4);
109 out[2] = *(int *)(data + 8);
110 out += 3;
111 data += stride;
112 }
113 }
114
115 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116 {
117 int i;
118
119 if (RADEON_DEBUG & RADEON_VERTS)
120 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__, count, stride, (void *)out, (void *)data);
122
123 if (stride == 16)
124 COPY_DWORDS(out, data, count * 4);
125 else
126 for (i = 0; i < count; i++) {
127 out[0] = *(int *)data;
128 out[1] = *(int *)(data + 4);
129 out[2] = *(int *)(data + 8);
130 out[3] = *(int *)(data + 12);
131 out += 4;
132 data += stride;
133 }
134 }
135
136 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
137 const GLvoid * data, int size, int stride, int count)
138 {
139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140 uint32_t *out;
141
142 if (stride == 0) {
143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144 count = 1;
145 aos->stride = 0;
146 } else {
147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148 aos->stride = size;
149 }
150
151 aos->components = size;
152 aos->count = count;
153
154 radeon_bo_map(aos->bo, 1);
155 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
156 switch (size) {
157 case 1: radeonEmitVec4(out, data, stride, count); break;
158 case 2: radeonEmitVec8(out, data, stride, count); break;
159 case 3: radeonEmitVec12(out, data, stride, count); break;
160 case 4: radeonEmitVec16(out, data, stride, count); break;
161 default:
162 assert(0);
163 break;
164 }
165 radeon_bo_unmap(aos->bo);
166 }
167
168 void radeon_init_dma(radeonContextPtr rmesa)
169 {
170 make_empty_list(&rmesa->dma.free);
171 make_empty_list(&rmesa->dma.wait);
172 make_empty_list(&rmesa->dma.reserved);
173 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
174 }
175
176 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
177 {
178 struct radeon_dma_bo *dma_bo = NULL;
179 /* we set minimum sizes to at least requested size
180 aligned to next 16 bytes. */
181 if (size > rmesa->dma.minimum_size)
182 rmesa->dma.minimum_size = (size + 15) & (~15);
183
184 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
185 __FUNCTION__, size, rmesa->dma.minimum_size);
186
187 if (is_empty_list(&rmesa->dma.free)
188 || last_elem(&rmesa->dma.free)->bo->size < size) {
189 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
190 assert(dma_bo);
191
192 again_alloc:
193 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
194 0, rmesa->dma.minimum_size, 4,
195 RADEON_GEM_DOMAIN_GTT, 0);
196
197 if (!dma_bo->bo) {
198 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
199 goto again_alloc;
200 }
201 insert_at_head(&rmesa->dma.reserved, dma_bo);
202 } else {
203 /* We push and pop buffers from end of list so we can keep
204 counter on unused buffers for later freeing them from
205 begin of list */
206 dma_bo = last_elem(&rmesa->dma.free);
207 remove_from_list(dma_bo);
208 insert_at_head(&rmesa->dma.reserved, dma_bo);
209 }
210
211 rmesa->dma.current_used = 0;
212 rmesa->dma.current_vertexptr = 0;
213
214 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
215 first_elem(&rmesa->dma.reserved)->bo,
216 RADEON_GEM_DOMAIN_GTT, 0))
217 fprintf(stderr,"failure to revalidate BOs - badness\n");
218
219 if (is_empty_list(&rmesa->dma.reserved)) {
220 /* Cmd buff have been flushed in radeon_revalidate_bos */
221 goto again_alloc;
222 }
223 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
224 }
225
226 /* Allocates a region from rmesa->dma.current. If there isn't enough
227 * space in current, grab a new buffer (and discard what was left of current)
228 */
229 void radeonAllocDmaRegion(radeonContextPtr rmesa,
230 struct radeon_bo **pbo, int *poffset,
231 int bytes, int alignment)
232 {
233 if (RADEON_DEBUG & RADEON_IOCTL)
234 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
235
236 if (rmesa->dma.flush)
237 rmesa->dma.flush(rmesa->glCtx);
238
239 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
240
241 alignment--;
242 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
243
244 if (is_empty_list(&rmesa->dma.reserved)
245 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
246 radeonRefillCurrentDmaRegion(rmesa, bytes);
247
248 *poffset = rmesa->dma.current_used;
249 *pbo = first_elem(&rmesa->dma.reserved)->bo;
250 radeon_bo_ref(*pbo);
251
252 /* Always align to at least 16 bytes */
253 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
254 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
255
256 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
257 }
258
259 void radeonFreeDmaRegions(radeonContextPtr rmesa)
260 {
261 struct radeon_dma_bo *dma_bo;
262 struct radeon_dma_bo *temp;
263 if (RADEON_DEBUG & RADEON_DMA)
264 fprintf(stderr, "%s\n", __FUNCTION__);
265
266 foreach_s(dma_bo, temp, &rmesa->dma.free) {
267 remove_from_list(dma_bo);
268 radeon_bo_unref(dma_bo->bo);
269 FREE(dma_bo);
270 }
271
272 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
273 remove_from_list(dma_bo);
274 radeon_bo_unref(dma_bo->bo);
275 FREE(dma_bo);
276 }
277
278 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
279 remove_from_list(dma_bo);
280 radeon_bo_unref(dma_bo->bo);
281 FREE(dma_bo);
282 }
283 }
284
285 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
286 {
287 if (is_empty_list(&rmesa->dma.reserved))
288 return;
289
290 if (RADEON_DEBUG & RADEON_IOCTL)
291 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
292 rmesa->dma.current_used -= return_bytes;
293 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
294 }
295
296 static int radeon_bo_is_idle(struct radeon_bo* bo)
297 {
298 uint32_t domain;
299 int ret = radeon_bo_is_busy(bo, &domain);
300 if (ret == -EINVAL) {
301 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
302 "This may cause small performance drop for you.\n");
303 }
304 return ret != -EBUSY;
305 }
306
307 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
308 {
309 struct radeon_dma_bo *dma_bo;
310 struct radeon_dma_bo *temp;
311 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
312 const int time = rmesa->dma.free.expire_counter;
313
314 if (RADEON_DEBUG & RADEON_DMA) {
315 size_t free = 0,
316 wait = 0,
317 reserved = 0;
318 foreach(dma_bo, &rmesa->dma.free)
319 ++free;
320
321 foreach(dma_bo, &rmesa->dma.wait)
322 ++wait;
323
324 foreach(dma_bo, &rmesa->dma.reserved)
325 ++reserved;
326
327 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
328 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
329 }
330
331 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
332 /* request updated cs processing information from kernel */
333 legacy_track_pending(rmesa->radeonScreen->bom, 0);
334 }
335
336 /* move waiting bos to free list.
337 wait list provides gpu time to handle data before reuse */
338 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
339 if (dma_bo->expire_counter == time) {
340 WARN_ONCE("Leaking dma buffer object!\n");
341 radeon_bo_unref(dma_bo->bo);
342 remove_from_list(dma_bo);
343 FREE(dma_bo);
344 continue;
345 }
346 /* free objects that are too small to be used because of large request */
347 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
348 radeon_bo_unref(dma_bo->bo);
349 remove_from_list(dma_bo);
350 FREE(dma_bo);
351 continue;
352 }
353 if (!radeon_bo_is_idle(dma_bo->bo)) {
354 if (rmesa->radeonScreen->driScreen->dri2.enabled)
355 break;
356 continue;
357 }
358 remove_from_list(dma_bo);
359 dma_bo->expire_counter = expire_at;
360 insert_at_tail(&rmesa->dma.free, dma_bo);
361 }
362
363 /* move reserved to wait list */
364 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
365 radeon_bo_unmap(dma_bo->bo);
366 /* free objects that are too small to be used because of large request */
367 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
368 radeon_bo_unref(dma_bo->bo);
369 remove_from_list(dma_bo);
370 FREE(dma_bo);
371 continue;
372 }
373 remove_from_list(dma_bo);
374 dma_bo->expire_counter = expire_at;
375 insert_at_tail(&rmesa->dma.wait, dma_bo);
376 }
377
378 /* free bos that have been unused for some time */
379 foreach_s(dma_bo, temp, &rmesa->dma.free) {
380 if (dma_bo->expire_counter != time)
381 break;
382 remove_from_list(dma_bo);
383 radeon_bo_unref(dma_bo->bo);
384 FREE(dma_bo);
385 }
386
387 }
388
389
390 /* Flush vertices in the current dma region.
391 */
392 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx )
393 {
394 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
395 struct radeon_dma *dma = &rmesa->dma;
396
397 if (RADEON_DEBUG & RADEON_IOCTL)
398 fprintf(stderr, "%s\n", __FUNCTION__);
399 dma->flush = NULL;
400
401 radeon_bo_unmap(rmesa->swtcl.bo);
402
403 if (!is_empty_list(&dma->reserved)) {
404 GLuint current_offset = dma->current_used;
405
406 assert (dma->current_used +
407 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
408 dma->current_vertexptr);
409
410 if (dma->current_used != dma->current_vertexptr) {
411 dma->current_used = dma->current_vertexptr;
412
413 rmesa->vtbl.swtcl_flush(ctx, current_offset);
414 }
415 rmesa->swtcl.numverts = 0;
416 }
417 radeon_bo_unref(rmesa->swtcl.bo);
418 rmesa->swtcl.bo = NULL;
419 }
420 /* Alloc space in the current dma region.
421 */
422 void *
423 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
424 {
425 GLuint bytes = vsize * nverts;
426 void *head;
427 if (RADEON_DEBUG & RADEON_IOCTL)
428 fprintf(stderr, "%s\n", __FUNCTION__);
429
430 if(is_empty_list(&rmesa->dma.reserved)
431 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
432 if (rmesa->dma.flush) {
433 rmesa->dma.flush(rmesa->glCtx);
434 }
435
436 radeonRefillCurrentDmaRegion(rmesa, bytes);
437
438 return NULL;
439 }
440
441 if (!rmesa->dma.flush) {
442 /* if cmdbuf flushed DMA restart */
443 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
444 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
445 }
446
447 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
448 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
449 ASSERT( rmesa->dma.current_used +
450 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
451 rmesa->dma.current_vertexptr );
452
453 if (!rmesa->swtcl.bo) {
454 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
455 radeon_bo_ref(rmesa->swtcl.bo);
456 radeon_bo_map(rmesa->swtcl.bo, 1);
457 }
458
459 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
460 rmesa->dma.current_vertexptr += bytes;
461 rmesa->swtcl.numverts += nverts;
462 return head;
463 }
464
465 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
466 {
467 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
468 int i;
469 if (RADEON_DEBUG & RADEON_IOCTL)
470 fprintf(stderr, "%s\n", __FUNCTION__);
471
472 if (radeon->dma.flush) {
473 radeon->dma.flush(radeon->glCtx);
474 }
475 for (i = 0; i < radeon->tcl.aos_count; i++) {
476 if (radeon->tcl.aos[i].bo) {
477 radeon_bo_unref(radeon->tcl.aos[i].bo);
478 radeon->tcl.aos[i].bo = NULL;
479
480 }
481 }
482 }