winsys/amdgpu: use pb_cache buckets for fewer pb_cache misses
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
42 enum radeon_bo_usage usage)
43 {
44 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
45 struct amdgpu_winsys *ws = bo->ws;
46 int64_t abs_timeout;
47 int i;
48
49 if (timeout == 0) {
50 if (p_atomic_read(&bo->num_active_ioctls))
51 return false;
52
53 } else {
54 abs_timeout = os_time_get_absolute_timeout(timeout);
55
56 /* Wait if any ioctl is being submitted with this buffer. */
57 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
58 return false;
59 }
60
61 if (bo->is_shared) {
62 /* We can't use user fences for shared buffers, because user fences
63 * are local to this process only. If we want to wait for all buffer
64 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
65 */
66 bool buffer_busy = true;
67 int r;
68
69 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
70 if (r)
71 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
72 r);
73 return !buffer_busy;
74 }
75
76 if (timeout == 0) {
77 pipe_mutex_lock(ws->bo_fence_lock);
78 for (i = 0; i < RING_LAST; i++)
79 if (bo->fence[i]) {
80 if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
81 /* Release the idle fence to avoid checking it again later. */
82 amdgpu_fence_reference(&bo->fence[i], NULL);
83 } else {
84 pipe_mutex_unlock(ws->bo_fence_lock);
85 return false;
86 }
87 }
88 pipe_mutex_unlock(ws->bo_fence_lock);
89 return true;
90
91 } else {
92 struct pipe_fence_handle *fence[RING_LAST] = {};
93 bool fence_idle[RING_LAST] = {};
94 bool buffer_idle = true;
95
96 /* Take references to all fences, so that we can wait for them
97 * without the lock. */
98 pipe_mutex_lock(ws->bo_fence_lock);
99 for (i = 0; i < RING_LAST; i++)
100 amdgpu_fence_reference(&fence[i], bo->fence[i]);
101 pipe_mutex_unlock(ws->bo_fence_lock);
102
103 /* Now wait for the fences. */
104 for (i = 0; i < RING_LAST; i++) {
105 if (fence[i]) {
106 if (amdgpu_fence_wait(fence[i], abs_timeout, true))
107 fence_idle[i] = true;
108 else
109 buffer_idle = false;
110 }
111 }
112
113 /* Release idle fences to avoid checking them again later. */
114 pipe_mutex_lock(ws->bo_fence_lock);
115 for (i = 0; i < RING_LAST; i++) {
116 if (fence[i] == bo->fence[i] && fence_idle[i])
117 amdgpu_fence_reference(&bo->fence[i], NULL);
118
119 amdgpu_fence_reference(&fence[i], NULL);
120 }
121 pipe_mutex_unlock(ws->bo_fence_lock);
122
123 return buffer_idle;
124 }
125 }
126
127 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
128 struct pb_buffer *buf)
129 {
130 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
131 }
132
133 void amdgpu_bo_destroy(struct pb_buffer *_buf)
134 {
135 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
136 int i;
137
138 pipe_mutex_lock(bo->ws->global_bo_list_lock);
139 LIST_DEL(&bo->global_list_item);
140 bo->ws->num_buffers--;
141 pipe_mutex_unlock(bo->ws->global_bo_list_lock);
142
143 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
144 amdgpu_va_range_free(bo->va_handle);
145 amdgpu_bo_free(bo->bo);
146
147 for (i = 0; i < RING_LAST; i++)
148 amdgpu_fence_reference(&bo->fence[i], NULL);
149
150 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
151 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
152 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
153 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
154 FREE(bo);
155 }
156
157 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
158 {
159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
160
161 if (bo->use_reusable_pool)
162 pb_cache_add_buffer(&bo->cache_entry);
163 else
164 amdgpu_bo_destroy(_buf);
165 }
166
167 static void *amdgpu_bo_map(struct pb_buffer *buf,
168 struct radeon_winsys_cs *rcs,
169 enum pipe_transfer_usage usage)
170 {
171 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
172 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
173 int r;
174 void *cpu = NULL;
175
176 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
177 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
178 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
179 if (usage & PIPE_TRANSFER_DONTBLOCK) {
180 if (!(usage & PIPE_TRANSFER_WRITE)) {
181 /* Mapping for read.
182 *
183 * Since we are mapping for read, we don't need to wait
184 * if the GPU is using the buffer for read too
185 * (neither one is changing it).
186 *
187 * Only check whether the buffer is being used for write. */
188 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
189 RADEON_USAGE_WRITE)) {
190 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
191 return NULL;
192 }
193
194 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
195 RADEON_USAGE_WRITE)) {
196 return NULL;
197 }
198 } else {
199 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
200 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
201 return NULL;
202 }
203
204 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
205 RADEON_USAGE_READWRITE)) {
206 return NULL;
207 }
208 }
209 } else {
210 uint64_t time = os_time_get_nano();
211
212 if (!(usage & PIPE_TRANSFER_WRITE)) {
213 /* Mapping for read.
214 *
215 * Since we are mapping for read, we don't need to wait
216 * if the GPU is using the buffer for read too
217 * (neither one is changing it).
218 *
219 * Only check whether the buffer is being used for write. */
220 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
221 RADEON_USAGE_WRITE)) {
222 cs->flush_cs(cs->flush_data, 0, NULL);
223 } else {
224 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
225 if (p_atomic_read(&bo->num_active_ioctls))
226 amdgpu_cs_sync_flush(rcs);
227 }
228 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
229 RADEON_USAGE_WRITE);
230 } else {
231 /* Mapping for write. */
232 if (cs) {
233 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
234 cs->flush_cs(cs->flush_data, 0, NULL);
235 } else {
236 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
237 if (p_atomic_read(&bo->num_active_ioctls))
238 amdgpu_cs_sync_flush(rcs);
239 }
240 }
241
242 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
243 RADEON_USAGE_READWRITE);
244 }
245
246 bo->ws->buffer_wait_time += os_time_get_nano() - time;
247 }
248 }
249
250 /* If the buffer is created from user memory, return the user pointer. */
251 if (bo->user_ptr)
252 return bo->user_ptr;
253
254 r = amdgpu_bo_cpu_map(bo->bo, &cpu);
255 if (r) {
256 /* Clear the cache and try again. */
257 pb_cache_release_all_buffers(&bo->ws->bo_cache);
258 r = amdgpu_bo_cpu_map(bo->bo, &cpu);
259 }
260 return r ? NULL : cpu;
261 }
262
263 static void amdgpu_bo_unmap(struct pb_buffer *buf)
264 {
265 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
266
267 amdgpu_bo_cpu_unmap(bo->bo);
268 }
269
270 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
271 amdgpu_bo_destroy_or_cache
272 /* other functions are never called */
273 };
274
275 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
276 {
277 struct amdgpu_winsys *ws = bo->ws;
278
279 pipe_mutex_lock(ws->global_bo_list_lock);
280 LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
281 ws->num_buffers++;
282 pipe_mutex_unlock(ws->global_bo_list_lock);
283 }
284
285 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
286 uint64_t size,
287 unsigned alignment,
288 unsigned usage,
289 enum radeon_bo_domain initial_domain,
290 unsigned flags,
291 unsigned pb_cache_bucket)
292 {
293 struct amdgpu_bo_alloc_request request = {0};
294 amdgpu_bo_handle buf_handle;
295 uint64_t va = 0;
296 struct amdgpu_winsys_bo *bo;
297 amdgpu_va_handle va_handle;
298 unsigned va_gap_size;
299 int r;
300
301 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
302 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
303 if (!bo) {
304 return NULL;
305 }
306
307 pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base,
308 pb_cache_bucket);
309 request.alloc_size = size;
310 request.phys_alignment = alignment;
311
312 if (initial_domain & RADEON_DOMAIN_VRAM)
313 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
314 if (initial_domain & RADEON_DOMAIN_GTT)
315 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
316
317 if (flags & RADEON_FLAG_CPU_ACCESS)
318 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
319 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
320 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
321 if (flags & RADEON_FLAG_GTT_WC)
322 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
323
324 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
325 if (r) {
326 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
327 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
328 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
329 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
330 goto error_bo_alloc;
331 }
332
333 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
334 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
335 size + va_gap_size, alignment, 0, &va, &va_handle, 0);
336 if (r)
337 goto error_va_alloc;
338
339 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
340 if (r)
341 goto error_va_map;
342
343 pipe_reference_init(&bo->base.reference, 1);
344 bo->base.alignment = alignment;
345 bo->base.usage = usage;
346 bo->base.size = size;
347 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
348 bo->ws = ws;
349 bo->bo = buf_handle;
350 bo->va = va;
351 bo->va_handle = va_handle;
352 bo->initial_domain = initial_domain;
353 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
354
355 if (initial_domain & RADEON_DOMAIN_VRAM)
356 ws->allocated_vram += align64(size, ws->info.gart_page_size);
357 else if (initial_domain & RADEON_DOMAIN_GTT)
358 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
359
360 amdgpu_add_buffer_to_global_list(bo);
361
362 return bo;
363
364 error_va_map:
365 amdgpu_va_range_free(va_handle);
366
367 error_va_alloc:
368 amdgpu_bo_free(buf_handle);
369
370 error_bo_alloc:
371 FREE(bo);
372 return NULL;
373 }
374
375 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
376 {
377 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
378
379 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
380 return false;
381 }
382
383 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
384 }
385
386 static unsigned eg_tile_split(unsigned tile_split)
387 {
388 switch (tile_split) {
389 case 0: tile_split = 64; break;
390 case 1: tile_split = 128; break;
391 case 2: tile_split = 256; break;
392 case 3: tile_split = 512; break;
393 default:
394 case 4: tile_split = 1024; break;
395 case 5: tile_split = 2048; break;
396 case 6: tile_split = 4096; break;
397 }
398 return tile_split;
399 }
400
401 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
402 {
403 switch (eg_tile_split) {
404 case 64: return 0;
405 case 128: return 1;
406 case 256: return 2;
407 case 512: return 3;
408 default:
409 case 1024: return 4;
410 case 2048: return 5;
411 case 4096: return 6;
412 }
413 }
414
415 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
416 struct radeon_bo_metadata *md)
417 {
418 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
419 struct amdgpu_bo_info info = {0};
420 uint32_t tiling_flags;
421 int r;
422
423 r = amdgpu_bo_query_info(bo->bo, &info);
424 if (r)
425 return;
426
427 tiling_flags = info.metadata.tiling_info;
428
429 md->microtile = RADEON_LAYOUT_LINEAR;
430 md->macrotile = RADEON_LAYOUT_LINEAR;
431
432 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
433 md->macrotile = RADEON_LAYOUT_TILED;
434 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
435 md->microtile = RADEON_LAYOUT_TILED;
436
437 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
438 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
439 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
440 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
441 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
442 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
443 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
444
445 md->size_metadata = info.metadata.size_metadata;
446 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
447 }
448
449 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
450 struct radeon_bo_metadata *md)
451 {
452 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
453 struct amdgpu_bo_metadata metadata = {0};
454 uint32_t tiling_flags = 0;
455
456 if (md->macrotile == RADEON_LAYOUT_TILED)
457 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
458 else if (md->microtile == RADEON_LAYOUT_TILED)
459 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
460 else
461 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
462
463 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
464 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
465 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
466 if (md->tile_split)
467 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
468 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
469 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
470
471 if (md->scanout)
472 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
473 else
474 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
475
476 metadata.tiling_info = tiling_flags;
477 metadata.size_metadata = md->size_metadata;
478 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
479
480 amdgpu_bo_set_metadata(bo->bo, &metadata);
481 }
482
483 static struct pb_buffer *
484 amdgpu_bo_create(struct radeon_winsys *rws,
485 uint64_t size,
486 unsigned alignment,
487 enum radeon_bo_domain domain,
488 enum radeon_bo_flag flags)
489 {
490 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
491 struct amdgpu_winsys_bo *bo;
492 unsigned usage = 0, pb_cache_bucket;
493
494 /* Align size to page size. This is the minimum alignment for normal
495 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
496 * like constant/uniform buffers, can benefit from better and more reuse.
497 */
498 size = align64(size, ws->info.gart_page_size);
499 alignment = align(alignment, ws->info.gart_page_size);
500
501 /* Only set one usage bit each for domains and flags, or the cache manager
502 * might consider different sets of domains / flags compatible
503 */
504 if (domain == RADEON_DOMAIN_VRAM_GTT)
505 usage = 1 << 2;
506 else
507 usage = domain >> 1;
508 assert(flags < sizeof(usage) * 8 - 3);
509 usage |= 1 << (flags + 3);
510
511 /* Determine the pb_cache bucket for minimizing pb_cache misses. */
512 pb_cache_bucket = 0;
513 if (size <= 4096) /* small buffers */
514 pb_cache_bucket += 1;
515 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
516 pb_cache_bucket += 2;
517 if (flags == RADEON_FLAG_GTT_WC) /* WC */
518 pb_cache_bucket += 4;
519 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
520
521 /* Get a buffer from the cache. */
522 bo = (struct amdgpu_winsys_bo*)
523 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
524 pb_cache_bucket);
525 if (bo)
526 return &bo->base;
527
528 /* Create a new one. */
529 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
530 pb_cache_bucket);
531 if (!bo) {
532 /* Clear the cache and try again. */
533 pb_cache_release_all_buffers(&ws->bo_cache);
534 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
535 pb_cache_bucket);
536 if (!bo)
537 return NULL;
538 }
539
540 bo->use_reusable_pool = true;
541 return &bo->base;
542 }
543
544 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
545 struct winsys_handle *whandle,
546 unsigned *stride,
547 unsigned *offset)
548 {
549 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
550 struct amdgpu_winsys_bo *bo;
551 enum amdgpu_bo_handle_type type;
552 struct amdgpu_bo_import_result result = {0};
553 uint64_t va;
554 amdgpu_va_handle va_handle;
555 struct amdgpu_bo_info info = {0};
556 enum radeon_bo_domain initial = 0;
557 int r;
558
559 /* Initialize the structure. */
560 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
561 if (!bo) {
562 return NULL;
563 }
564
565 switch (whandle->type) {
566 case DRM_API_HANDLE_TYPE_SHARED:
567 type = amdgpu_bo_handle_type_gem_flink_name;
568 break;
569 case DRM_API_HANDLE_TYPE_FD:
570 type = amdgpu_bo_handle_type_dma_buf_fd;
571 break;
572 default:
573 return NULL;
574 }
575
576 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
577 if (r)
578 goto error;
579
580 /* Get initial domains. */
581 r = amdgpu_bo_query_info(result.buf_handle, &info);
582 if (r)
583 goto error_query;
584
585 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
586 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
587 if (r)
588 goto error_query;
589
590 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
591 if (r)
592 goto error_va_map;
593
594 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
595 initial |= RADEON_DOMAIN_VRAM;
596 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
597 initial |= RADEON_DOMAIN_GTT;
598
599
600 pipe_reference_init(&bo->base.reference, 1);
601 bo->base.alignment = info.phys_alignment;
602 bo->bo = result.buf_handle;
603 bo->base.size = result.alloc_size;
604 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
605 bo->ws = ws;
606 bo->va = va;
607 bo->va_handle = va_handle;
608 bo->initial_domain = initial;
609 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
610 bo->is_shared = true;
611
612 if (stride)
613 *stride = whandle->stride;
614 if (offset)
615 *offset = whandle->offset;
616
617 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
618 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
619 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
620 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
621
622 amdgpu_add_buffer_to_global_list(bo);
623
624 return &bo->base;
625
626 error_va_map:
627 amdgpu_va_range_free(va_handle);
628
629 error_query:
630 amdgpu_bo_free(result.buf_handle);
631
632 error:
633 FREE(bo);
634 return NULL;
635 }
636
637 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
638 unsigned stride, unsigned offset,
639 unsigned slice_size,
640 struct winsys_handle *whandle)
641 {
642 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
643 enum amdgpu_bo_handle_type type;
644 int r;
645
646 bo->use_reusable_pool = false;
647
648 switch (whandle->type) {
649 case DRM_API_HANDLE_TYPE_SHARED:
650 type = amdgpu_bo_handle_type_gem_flink_name;
651 break;
652 case DRM_API_HANDLE_TYPE_FD:
653 type = amdgpu_bo_handle_type_dma_buf_fd;
654 break;
655 case DRM_API_HANDLE_TYPE_KMS:
656 type = amdgpu_bo_handle_type_kms;
657 break;
658 default:
659 return false;
660 }
661
662 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
663 if (r)
664 return false;
665
666 whandle->stride = stride;
667 whandle->offset = offset;
668 whandle->offset += slice_size * whandle->layer;
669 bo->is_shared = true;
670 return true;
671 }
672
673 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
674 void *pointer, uint64_t size)
675 {
676 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
677 amdgpu_bo_handle buf_handle;
678 struct amdgpu_winsys_bo *bo;
679 uint64_t va;
680 amdgpu_va_handle va_handle;
681
682 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
683 if (!bo)
684 return NULL;
685
686 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
687 goto error;
688
689 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
690 size, 1 << 12, 0, &va, &va_handle, 0))
691 goto error_va_alloc;
692
693 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
694 goto error_va_map;
695
696 /* Initialize it. */
697 pipe_reference_init(&bo->base.reference, 1);
698 bo->bo = buf_handle;
699 bo->base.alignment = 0;
700 bo->base.size = size;
701 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
702 bo->ws = ws;
703 bo->user_ptr = pointer;
704 bo->va = va;
705 bo->va_handle = va_handle;
706 bo->initial_domain = RADEON_DOMAIN_GTT;
707 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
708
709 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
710
711 amdgpu_add_buffer_to_global_list(bo);
712
713 return (struct pb_buffer*)bo;
714
715 error_va_map:
716 amdgpu_va_range_free(va_handle);
717
718 error_va_alloc:
719 amdgpu_bo_free(buf_handle);
720
721 error:
722 FREE(bo);
723 return NULL;
724 }
725
726 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
727 {
728 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
729 }
730
731 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
732 {
733 return ((struct amdgpu_winsys_bo*)buf)->va;
734 }
735
736 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
737 {
738 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
739 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
740 ws->base.buffer_map = amdgpu_bo_map;
741 ws->base.buffer_unmap = amdgpu_bo_unmap;
742 ws->base.buffer_wait = amdgpu_bo_wait;
743 ws->base.buffer_create = amdgpu_bo_create;
744 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
745 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
746 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
747 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
748 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
749 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
750 }