winsys/amdgpu: add slab entry structures to amdgpu_winsys_bo
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
42 enum radeon_bo_usage usage)
43 {
44 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
45 struct amdgpu_winsys *ws = bo->ws;
46 int64_t abs_timeout;
47
48 if (timeout == 0) {
49 if (p_atomic_read(&bo->num_active_ioctls))
50 return false;
51
52 } else {
53 abs_timeout = os_time_get_absolute_timeout(timeout);
54
55 /* Wait if any ioctl is being submitted with this buffer. */
56 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
57 return false;
58 }
59
60 if (bo->is_shared) {
61 /* We can't use user fences for shared buffers, because user fences
62 * are local to this process only. If we want to wait for all buffer
63 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
64 */
65 bool buffer_busy = true;
66 int r;
67
68 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
69 if (r)
70 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
71 r);
72 return !buffer_busy;
73 }
74
75 if (timeout == 0) {
76 unsigned idle_fences;
77 bool buffer_idle;
78
79 pipe_mutex_lock(ws->bo_fence_lock);
80
81 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
82 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
83 break;
84 }
85
86 /* Release the idle fences to avoid checking them again later. */
87 for (unsigned i = 0; i < idle_fences; ++i)
88 amdgpu_fence_reference(&bo->fences[i], NULL);
89
90 memmove(&bo->fences[0], &bo->fences[idle_fences],
91 (bo->num_fences - idle_fences) * sizeof(*bo->fences));
92 bo->num_fences -= idle_fences;
93
94 buffer_idle = !bo->num_fences;
95 pipe_mutex_unlock(ws->bo_fence_lock);
96
97 return buffer_idle;
98 } else {
99 bool buffer_idle = true;
100
101 pipe_mutex_lock(ws->bo_fence_lock);
102 while (bo->num_fences && buffer_idle) {
103 struct pipe_fence_handle *fence = NULL;
104 bool fence_idle = false;
105
106 amdgpu_fence_reference(&fence, bo->fences[0]);
107
108 /* Wait for the fence. */
109 pipe_mutex_unlock(ws->bo_fence_lock);
110 if (amdgpu_fence_wait(fence, abs_timeout, true))
111 fence_idle = true;
112 else
113 buffer_idle = false;
114 pipe_mutex_lock(ws->bo_fence_lock);
115
116 /* Release an idle fence to avoid checking it again later, keeping in
117 * mind that the fence array may have been modified by other threads.
118 */
119 if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
120 amdgpu_fence_reference(&bo->fences[0], NULL);
121 memmove(&bo->fences[0], &bo->fences[1],
122 (bo->num_fences - 1) * sizeof(*bo->fences));
123 bo->num_fences--;
124 }
125
126 amdgpu_fence_reference(&fence, NULL);
127 }
128 pipe_mutex_unlock(ws->bo_fence_lock);
129
130 return buffer_idle;
131 }
132 }
133
134 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
135 struct pb_buffer *buf)
136 {
137 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
138 }
139
140 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
141 {
142 for (unsigned i = 0; i < bo->num_fences; ++i)
143 amdgpu_fence_reference(&bo->fences[i], NULL);
144
145 FREE(bo->fences);
146 bo->num_fences = 0;
147 bo->max_fences = 0;
148 }
149
150 void amdgpu_bo_destroy(struct pb_buffer *_buf)
151 {
152 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
153
154 assert(bo->bo && "must not be called for slab entries");
155
156 pipe_mutex_lock(bo->ws->global_bo_list_lock);
157 LIST_DEL(&bo->u.real.global_list_item);
158 bo->ws->num_buffers--;
159 pipe_mutex_unlock(bo->ws->global_bo_list_lock);
160
161 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
162 amdgpu_va_range_free(bo->u.real.va_handle);
163 amdgpu_bo_free(bo->bo);
164
165 amdgpu_bo_remove_fences(bo);
166
167 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
168 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
169 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
170 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
171
172 if (bo->u.real.map_count >= 1) {
173 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
174 bo->ws->mapped_vram -= bo->base.size;
175 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
176 bo->ws->mapped_gtt -= bo->base.size;
177 }
178
179 FREE(bo);
180 }
181
182 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
183 {
184 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
185
186 assert(bo->bo); /* slab buffers have a separate vtbl */
187
188 if (bo->u.real.use_reusable_pool)
189 pb_cache_add_buffer(&bo->u.real.cache_entry);
190 else
191 amdgpu_bo_destroy(_buf);
192 }
193
194 static void *amdgpu_bo_map(struct pb_buffer *buf,
195 struct radeon_winsys_cs *rcs,
196 enum pipe_transfer_usage usage)
197 {
198 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
199 struct amdgpu_winsys_bo *real;
200 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
201 int r;
202 void *cpu = NULL;
203 uint64_t offset = 0;
204
205 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
206 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
207 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
208 if (usage & PIPE_TRANSFER_DONTBLOCK) {
209 if (!(usage & PIPE_TRANSFER_WRITE)) {
210 /* Mapping for read.
211 *
212 * Since we are mapping for read, we don't need to wait
213 * if the GPU is using the buffer for read too
214 * (neither one is changing it).
215 *
216 * Only check whether the buffer is being used for write. */
217 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
218 RADEON_USAGE_WRITE)) {
219 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
220 return NULL;
221 }
222
223 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
224 RADEON_USAGE_WRITE)) {
225 return NULL;
226 }
227 } else {
228 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
229 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
230 return NULL;
231 }
232
233 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
234 RADEON_USAGE_READWRITE)) {
235 return NULL;
236 }
237 }
238 } else {
239 uint64_t time = os_time_get_nano();
240
241 if (!(usage & PIPE_TRANSFER_WRITE)) {
242 /* Mapping for read.
243 *
244 * Since we are mapping for read, we don't need to wait
245 * if the GPU is using the buffer for read too
246 * (neither one is changing it).
247 *
248 * Only check whether the buffer is being used for write. */
249 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
250 RADEON_USAGE_WRITE)) {
251 cs->flush_cs(cs->flush_data, 0, NULL);
252 } else {
253 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
254 if (p_atomic_read(&bo->num_active_ioctls))
255 amdgpu_cs_sync_flush(rcs);
256 }
257 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
258 RADEON_USAGE_WRITE);
259 } else {
260 /* Mapping for write. */
261 if (cs) {
262 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
263 cs->flush_cs(cs->flush_data, 0, NULL);
264 } else {
265 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
266 if (p_atomic_read(&bo->num_active_ioctls))
267 amdgpu_cs_sync_flush(rcs);
268 }
269 }
270
271 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
272 RADEON_USAGE_READWRITE);
273 }
274
275 bo->ws->buffer_wait_time += os_time_get_nano() - time;
276 }
277 }
278
279 /* If the buffer is created from user memory, return the user pointer. */
280 if (bo->user_ptr)
281 return bo->user_ptr;
282
283 if (bo->bo) {
284 real = bo;
285 } else {
286 real = bo->u.slab.real;
287 offset = bo->va - real->va;
288 }
289
290 r = amdgpu_bo_cpu_map(real->bo, &cpu);
291 if (r) {
292 /* Clear the cache and try again. */
293 pb_cache_release_all_buffers(&real->ws->bo_cache);
294 r = amdgpu_bo_cpu_map(real->bo, &cpu);
295 if (r)
296 return NULL;
297 }
298
299 if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
300 if (real->initial_domain & RADEON_DOMAIN_VRAM)
301 real->ws->mapped_vram += real->base.size;
302 else if (real->initial_domain & RADEON_DOMAIN_GTT)
303 real->ws->mapped_gtt += real->base.size;
304 }
305 return (uint8_t*)cpu + offset;
306 }
307
308 static void amdgpu_bo_unmap(struct pb_buffer *buf)
309 {
310 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
311 struct amdgpu_winsys_bo *real;
312
313 if (bo->user_ptr)
314 return;
315
316 real = bo->bo ? bo : bo->u.slab.real;
317
318 if (p_atomic_dec_zero(&real->u.real.map_count)) {
319 if (real->initial_domain & RADEON_DOMAIN_VRAM)
320 real->ws->mapped_vram -= real->base.size;
321 else if (real->initial_domain & RADEON_DOMAIN_GTT)
322 real->ws->mapped_gtt -= real->base.size;
323 }
324
325 amdgpu_bo_cpu_unmap(real->bo);
326 }
327
328 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
329 amdgpu_bo_destroy_or_cache
330 /* other functions are never called */
331 };
332
333 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
334 {
335 struct amdgpu_winsys *ws = bo->ws;
336
337 assert(bo->bo);
338
339 pipe_mutex_lock(ws->global_bo_list_lock);
340 LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
341 ws->num_buffers++;
342 pipe_mutex_unlock(ws->global_bo_list_lock);
343 }
344
345 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
346 uint64_t size,
347 unsigned alignment,
348 unsigned usage,
349 enum radeon_bo_domain initial_domain,
350 unsigned flags,
351 unsigned pb_cache_bucket)
352 {
353 struct amdgpu_bo_alloc_request request = {0};
354 amdgpu_bo_handle buf_handle;
355 uint64_t va = 0;
356 struct amdgpu_winsys_bo *bo;
357 amdgpu_va_handle va_handle;
358 unsigned va_gap_size;
359 int r;
360
361 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
362 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
363 if (!bo) {
364 return NULL;
365 }
366
367 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
368 pb_cache_bucket);
369 request.alloc_size = size;
370 request.phys_alignment = alignment;
371
372 if (initial_domain & RADEON_DOMAIN_VRAM)
373 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
374 if (initial_domain & RADEON_DOMAIN_GTT)
375 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
376
377 if (flags & RADEON_FLAG_CPU_ACCESS)
378 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
379 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
380 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
381 if (flags & RADEON_FLAG_GTT_WC)
382 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
383
384 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
385 if (r) {
386 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
387 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
388 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
389 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
390 goto error_bo_alloc;
391 }
392
393 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
394 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
395 size + va_gap_size, alignment, 0, &va, &va_handle, 0);
396 if (r)
397 goto error_va_alloc;
398
399 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
400 if (r)
401 goto error_va_map;
402
403 pipe_reference_init(&bo->base.reference, 1);
404 bo->base.alignment = alignment;
405 bo->base.usage = usage;
406 bo->base.size = size;
407 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
408 bo->ws = ws;
409 bo->bo = buf_handle;
410 bo->va = va;
411 bo->u.real.va_handle = va_handle;
412 bo->initial_domain = initial_domain;
413 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
414
415 if (initial_domain & RADEON_DOMAIN_VRAM)
416 ws->allocated_vram += align64(size, ws->info.gart_page_size);
417 else if (initial_domain & RADEON_DOMAIN_GTT)
418 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
419
420 amdgpu_add_buffer_to_global_list(bo);
421
422 return bo;
423
424 error_va_map:
425 amdgpu_va_range_free(va_handle);
426
427 error_va_alloc:
428 amdgpu_bo_free(buf_handle);
429
430 error_bo_alloc:
431 FREE(bo);
432 return NULL;
433 }
434
435 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
436 {
437 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
438
439 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
440 return false;
441 }
442
443 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
444 }
445
446 static unsigned eg_tile_split(unsigned tile_split)
447 {
448 switch (tile_split) {
449 case 0: tile_split = 64; break;
450 case 1: tile_split = 128; break;
451 case 2: tile_split = 256; break;
452 case 3: tile_split = 512; break;
453 default:
454 case 4: tile_split = 1024; break;
455 case 5: tile_split = 2048; break;
456 case 6: tile_split = 4096; break;
457 }
458 return tile_split;
459 }
460
461 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
462 {
463 switch (eg_tile_split) {
464 case 64: return 0;
465 case 128: return 1;
466 case 256: return 2;
467 case 512: return 3;
468 default:
469 case 1024: return 4;
470 case 2048: return 5;
471 case 4096: return 6;
472 }
473 }
474
475 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
476 struct radeon_bo_metadata *md)
477 {
478 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
479 struct amdgpu_bo_info info = {0};
480 uint32_t tiling_flags;
481 int r;
482
483 assert(bo->bo && "must not be called for slab entries");
484
485 r = amdgpu_bo_query_info(bo->bo, &info);
486 if (r)
487 return;
488
489 tiling_flags = info.metadata.tiling_info;
490
491 md->microtile = RADEON_LAYOUT_LINEAR;
492 md->macrotile = RADEON_LAYOUT_LINEAR;
493
494 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
495 md->macrotile = RADEON_LAYOUT_TILED;
496 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
497 md->microtile = RADEON_LAYOUT_TILED;
498
499 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
500 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
501 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
502 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
503 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
504 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
505 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
506
507 md->size_metadata = info.metadata.size_metadata;
508 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
509 }
510
511 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
512 struct radeon_bo_metadata *md)
513 {
514 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
515 struct amdgpu_bo_metadata metadata = {0};
516 uint32_t tiling_flags = 0;
517
518 assert(bo->bo && "must not be called for slab entries");
519
520 if (md->macrotile == RADEON_LAYOUT_TILED)
521 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
522 else if (md->microtile == RADEON_LAYOUT_TILED)
523 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
524 else
525 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
526
527 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
528 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
529 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
530 if (md->tile_split)
531 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
532 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
533 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
534
535 if (md->scanout)
536 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
537 else
538 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
539
540 metadata.tiling_info = tiling_flags;
541 metadata.size_metadata = md->size_metadata;
542 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
543
544 amdgpu_bo_set_metadata(bo->bo, &metadata);
545 }
546
547 static struct pb_buffer *
548 amdgpu_bo_create(struct radeon_winsys *rws,
549 uint64_t size,
550 unsigned alignment,
551 enum radeon_bo_domain domain,
552 enum radeon_bo_flag flags)
553 {
554 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
555 struct amdgpu_winsys_bo *bo;
556 unsigned usage = 0, pb_cache_bucket;
557
558 /* This flag is irrelevant for the cache. */
559 flags &= ~RADEON_FLAG_HANDLE;
560
561 /* Align size to page size. This is the minimum alignment for normal
562 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
563 * like constant/uniform buffers, can benefit from better and more reuse.
564 */
565 size = align64(size, ws->info.gart_page_size);
566 alignment = align(alignment, ws->info.gart_page_size);
567
568 /* Only set one usage bit each for domains and flags, or the cache manager
569 * might consider different sets of domains / flags compatible
570 */
571 if (domain == RADEON_DOMAIN_VRAM_GTT)
572 usage = 1 << 2;
573 else
574 usage = domain >> 1;
575 assert(flags < sizeof(usage) * 8 - 3);
576 usage |= 1 << (flags + 3);
577
578 /* Determine the pb_cache bucket for minimizing pb_cache misses. */
579 pb_cache_bucket = 0;
580 if (size <= 4096) /* small buffers */
581 pb_cache_bucket += 1;
582 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
583 pb_cache_bucket += 2;
584 if (flags == RADEON_FLAG_GTT_WC) /* WC */
585 pb_cache_bucket += 4;
586 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
587
588 /* Get a buffer from the cache. */
589 bo = (struct amdgpu_winsys_bo*)
590 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
591 pb_cache_bucket);
592 if (bo)
593 return &bo->base;
594
595 /* Create a new one. */
596 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
597 pb_cache_bucket);
598 if (!bo) {
599 /* Clear the cache and try again. */
600 pb_cache_release_all_buffers(&ws->bo_cache);
601 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
602 pb_cache_bucket);
603 if (!bo)
604 return NULL;
605 }
606
607 bo->u.real.use_reusable_pool = true;
608 return &bo->base;
609 }
610
611 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
612 struct winsys_handle *whandle,
613 unsigned *stride,
614 unsigned *offset)
615 {
616 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
617 struct amdgpu_winsys_bo *bo;
618 enum amdgpu_bo_handle_type type;
619 struct amdgpu_bo_import_result result = {0};
620 uint64_t va;
621 amdgpu_va_handle va_handle;
622 struct amdgpu_bo_info info = {0};
623 enum radeon_bo_domain initial = 0;
624 int r;
625
626 /* Initialize the structure. */
627 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
628 if (!bo) {
629 return NULL;
630 }
631
632 switch (whandle->type) {
633 case DRM_API_HANDLE_TYPE_SHARED:
634 type = amdgpu_bo_handle_type_gem_flink_name;
635 break;
636 case DRM_API_HANDLE_TYPE_FD:
637 type = amdgpu_bo_handle_type_dma_buf_fd;
638 break;
639 default:
640 return NULL;
641 }
642
643 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
644 if (r)
645 goto error;
646
647 /* Get initial domains. */
648 r = amdgpu_bo_query_info(result.buf_handle, &info);
649 if (r)
650 goto error_query;
651
652 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
653 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
654 if (r)
655 goto error_query;
656
657 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
658 if (r)
659 goto error_va_map;
660
661 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
662 initial |= RADEON_DOMAIN_VRAM;
663 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
664 initial |= RADEON_DOMAIN_GTT;
665
666
667 pipe_reference_init(&bo->base.reference, 1);
668 bo->base.alignment = info.phys_alignment;
669 bo->bo = result.buf_handle;
670 bo->base.size = result.alloc_size;
671 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
672 bo->ws = ws;
673 bo->va = va;
674 bo->u.real.va_handle = va_handle;
675 bo->initial_domain = initial;
676 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
677 bo->is_shared = true;
678
679 if (stride)
680 *stride = whandle->stride;
681 if (offset)
682 *offset = whandle->offset;
683
684 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
685 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
686 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
687 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
688
689 amdgpu_add_buffer_to_global_list(bo);
690
691 return &bo->base;
692
693 error_va_map:
694 amdgpu_va_range_free(va_handle);
695
696 error_query:
697 amdgpu_bo_free(result.buf_handle);
698
699 error:
700 FREE(bo);
701 return NULL;
702 }
703
704 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
705 unsigned stride, unsigned offset,
706 unsigned slice_size,
707 struct winsys_handle *whandle)
708 {
709 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
710 enum amdgpu_bo_handle_type type;
711 int r;
712
713 if (!bo->bo) {
714 offset += bo->va - bo->u.slab.real->va;
715 bo = bo->u.slab.real;
716 }
717
718 bo->u.real.use_reusable_pool = false;
719
720 switch (whandle->type) {
721 case DRM_API_HANDLE_TYPE_SHARED:
722 type = amdgpu_bo_handle_type_gem_flink_name;
723 break;
724 case DRM_API_HANDLE_TYPE_FD:
725 type = amdgpu_bo_handle_type_dma_buf_fd;
726 break;
727 case DRM_API_HANDLE_TYPE_KMS:
728 type = amdgpu_bo_handle_type_kms;
729 break;
730 default:
731 return false;
732 }
733
734 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
735 if (r)
736 return false;
737
738 whandle->stride = stride;
739 whandle->offset = offset;
740 whandle->offset += slice_size * whandle->layer;
741 bo->is_shared = true;
742 return true;
743 }
744
745 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
746 void *pointer, uint64_t size)
747 {
748 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
749 amdgpu_bo_handle buf_handle;
750 struct amdgpu_winsys_bo *bo;
751 uint64_t va;
752 amdgpu_va_handle va_handle;
753
754 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
755 if (!bo)
756 return NULL;
757
758 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
759 goto error;
760
761 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
762 size, 1 << 12, 0, &va, &va_handle, 0))
763 goto error_va_alloc;
764
765 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
766 goto error_va_map;
767
768 /* Initialize it. */
769 pipe_reference_init(&bo->base.reference, 1);
770 bo->bo = buf_handle;
771 bo->base.alignment = 0;
772 bo->base.size = size;
773 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
774 bo->ws = ws;
775 bo->user_ptr = pointer;
776 bo->va = va;
777 bo->u.real.va_handle = va_handle;
778 bo->initial_domain = RADEON_DOMAIN_GTT;
779 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
780
781 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
782
783 amdgpu_add_buffer_to_global_list(bo);
784
785 return (struct pb_buffer*)bo;
786
787 error_va_map:
788 amdgpu_va_range_free(va_handle);
789
790 error_va_alloc:
791 amdgpu_bo_free(buf_handle);
792
793 error:
794 FREE(bo);
795 return NULL;
796 }
797
798 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
799 {
800 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
801 }
802
803 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
804 {
805 return ((struct amdgpu_winsys_bo*)buf)->va;
806 }
807
808 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
809 {
810 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
811 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
812 ws->base.buffer_map = amdgpu_bo_map;
813 ws->base.buffer_unmap = amdgpu_bo_unmap;
814 ws->base.buffer_wait = amdgpu_bo_wait;
815 ws->base.buffer_create = amdgpu_bo_create;
816 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
817 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
818 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
819 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
820 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
821 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
822 }