gallium/util: move endian detect into a separate file
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
42 enum radeon_bo_usage usage)
43 {
44 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
45 struct amdgpu_winsys *ws = bo->ws;
46 int64_t abs_timeout;
47 int i;
48
49 if (timeout == 0) {
50 if (p_atomic_read(&bo->num_active_ioctls))
51 return false;
52
53 } else {
54 abs_timeout = os_time_get_absolute_timeout(timeout);
55
56 /* Wait if any ioctl is being submitted with this buffer. */
57 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
58 return false;
59 }
60
61 if (bo->is_shared) {
62 /* We can't use user fences for shared buffers, because user fences
63 * are local to this process only. If we want to wait for all buffer
64 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
65 */
66 bool buffer_busy = true;
67 int r;
68
69 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
70 if (r)
71 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
72 r);
73 return !buffer_busy;
74 }
75
76 if (timeout == 0) {
77 pipe_mutex_lock(ws->bo_fence_lock);
78 for (i = 0; i < RING_LAST; i++)
79 if (bo->fence[i]) {
80 if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
81 /* Release the idle fence to avoid checking it again later. */
82 amdgpu_fence_reference(&bo->fence[i], NULL);
83 } else {
84 pipe_mutex_unlock(ws->bo_fence_lock);
85 return false;
86 }
87 }
88 pipe_mutex_unlock(ws->bo_fence_lock);
89 return true;
90
91 } else {
92 struct pipe_fence_handle *fence[RING_LAST] = {};
93 bool fence_idle[RING_LAST] = {};
94 bool buffer_idle = true;
95
96 /* Take references to all fences, so that we can wait for them
97 * without the lock. */
98 pipe_mutex_lock(ws->bo_fence_lock);
99 for (i = 0; i < RING_LAST; i++)
100 amdgpu_fence_reference(&fence[i], bo->fence[i]);
101 pipe_mutex_unlock(ws->bo_fence_lock);
102
103 /* Now wait for the fences. */
104 for (i = 0; i < RING_LAST; i++) {
105 if (fence[i]) {
106 if (amdgpu_fence_wait(fence[i], abs_timeout, true))
107 fence_idle[i] = true;
108 else
109 buffer_idle = false;
110 }
111 }
112
113 /* Release idle fences to avoid checking them again later. */
114 pipe_mutex_lock(ws->bo_fence_lock);
115 for (i = 0; i < RING_LAST; i++) {
116 if (fence[i] == bo->fence[i] && fence_idle[i])
117 amdgpu_fence_reference(&bo->fence[i], NULL);
118
119 amdgpu_fence_reference(&fence[i], NULL);
120 }
121 pipe_mutex_unlock(ws->bo_fence_lock);
122
123 return buffer_idle;
124 }
125 }
126
127 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
128 struct pb_buffer *buf)
129 {
130 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
131 }
132
133 void amdgpu_bo_destroy(struct pb_buffer *_buf)
134 {
135 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
136 int i;
137
138 pipe_mutex_lock(bo->ws->global_bo_list_lock);
139 LIST_DEL(&bo->global_list_item);
140 bo->ws->num_buffers--;
141 pipe_mutex_unlock(bo->ws->global_bo_list_lock);
142
143 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
144 amdgpu_va_range_free(bo->va_handle);
145 amdgpu_bo_free(bo->bo);
146
147 for (i = 0; i < RING_LAST; i++)
148 amdgpu_fence_reference(&bo->fence[i], NULL);
149
150 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
151 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
152 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
153 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
154 FREE(bo);
155 }
156
157 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
158 {
159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
160
161 if (bo->use_reusable_pool)
162 pb_cache_add_buffer(&bo->cache_entry);
163 else
164 amdgpu_bo_destroy(_buf);
165 }
166
167 static void *amdgpu_bo_map(struct pb_buffer *buf,
168 struct radeon_winsys_cs *rcs,
169 enum pipe_transfer_usage usage)
170 {
171 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
172 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
173 int r;
174 void *cpu = NULL;
175
176 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
177 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
178 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
179 if (usage & PIPE_TRANSFER_DONTBLOCK) {
180 if (!(usage & PIPE_TRANSFER_WRITE)) {
181 /* Mapping for read.
182 *
183 * Since we are mapping for read, we don't need to wait
184 * if the GPU is using the buffer for read too
185 * (neither one is changing it).
186 *
187 * Only check whether the buffer is being used for write. */
188 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
189 RADEON_USAGE_WRITE)) {
190 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
191 return NULL;
192 }
193
194 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
195 RADEON_USAGE_WRITE)) {
196 return NULL;
197 }
198 } else {
199 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
200 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
201 return NULL;
202 }
203
204 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
205 RADEON_USAGE_READWRITE)) {
206 return NULL;
207 }
208 }
209 } else {
210 uint64_t time = os_time_get_nano();
211
212 if (!(usage & PIPE_TRANSFER_WRITE)) {
213 /* Mapping for read.
214 *
215 * Since we are mapping for read, we don't need to wait
216 * if the GPU is using the buffer for read too
217 * (neither one is changing it).
218 *
219 * Only check whether the buffer is being used for write. */
220 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
221 RADEON_USAGE_WRITE)) {
222 cs->flush_cs(cs->flush_data, 0, NULL);
223 } else {
224 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
225 if (p_atomic_read(&bo->num_active_ioctls))
226 amdgpu_cs_sync_flush(rcs);
227 }
228 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
229 RADEON_USAGE_WRITE);
230 } else {
231 /* Mapping for write. */
232 if (cs) {
233 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
234 cs->flush_cs(cs->flush_data, 0, NULL);
235 } else {
236 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
237 if (p_atomic_read(&bo->num_active_ioctls))
238 amdgpu_cs_sync_flush(rcs);
239 }
240 }
241
242 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
243 RADEON_USAGE_READWRITE);
244 }
245
246 bo->ws->buffer_wait_time += os_time_get_nano() - time;
247 }
248 }
249
250 /* If the buffer is created from user memory, return the user pointer. */
251 if (bo->user_ptr)
252 return bo->user_ptr;
253
254 r = amdgpu_bo_cpu_map(bo->bo, &cpu);
255 if (r) {
256 /* Clear the cache and try again. */
257 pb_cache_release_all_buffers(&bo->ws->bo_cache);
258 r = amdgpu_bo_cpu_map(bo->bo, &cpu);
259 if (r)
260 return NULL;
261 }
262
263 if (p_atomic_inc_return(&bo->map_count) == 1) {
264 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
265 bo->ws->mapped_vram += bo->base.size;
266 else
267 bo->ws->mapped_gtt += bo->base.size;
268 }
269 return cpu;
270 }
271
272 static void amdgpu_bo_unmap(struct pb_buffer *buf)
273 {
274 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
275
276 if (bo->user_ptr)
277 return;
278
279 if (p_atomic_dec_zero(&bo->map_count)) {
280 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
281 bo->ws->mapped_vram -= bo->base.size;
282 else
283 bo->ws->mapped_gtt -= bo->base.size;
284 }
285
286 amdgpu_bo_cpu_unmap(bo->bo);
287 }
288
289 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
290 amdgpu_bo_destroy_or_cache
291 /* other functions are never called */
292 };
293
294 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
295 {
296 struct amdgpu_winsys *ws = bo->ws;
297
298 pipe_mutex_lock(ws->global_bo_list_lock);
299 LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
300 ws->num_buffers++;
301 pipe_mutex_unlock(ws->global_bo_list_lock);
302 }
303
304 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
305 uint64_t size,
306 unsigned alignment,
307 unsigned usage,
308 enum radeon_bo_domain initial_domain,
309 unsigned flags,
310 unsigned pb_cache_bucket)
311 {
312 struct amdgpu_bo_alloc_request request = {0};
313 amdgpu_bo_handle buf_handle;
314 uint64_t va = 0;
315 struct amdgpu_winsys_bo *bo;
316 amdgpu_va_handle va_handle;
317 unsigned va_gap_size;
318 int r;
319
320 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
321 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
322 if (!bo) {
323 return NULL;
324 }
325
326 pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base,
327 pb_cache_bucket);
328 request.alloc_size = size;
329 request.phys_alignment = alignment;
330
331 if (initial_domain & RADEON_DOMAIN_VRAM)
332 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
333 if (initial_domain & RADEON_DOMAIN_GTT)
334 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
335
336 if (flags & RADEON_FLAG_CPU_ACCESS)
337 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
338 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
339 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
340 if (flags & RADEON_FLAG_GTT_WC)
341 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
342
343 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
344 if (r) {
345 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
346 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
347 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
348 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
349 goto error_bo_alloc;
350 }
351
352 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
353 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
354 size + va_gap_size, alignment, 0, &va, &va_handle, 0);
355 if (r)
356 goto error_va_alloc;
357
358 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
359 if (r)
360 goto error_va_map;
361
362 pipe_reference_init(&bo->base.reference, 1);
363 bo->base.alignment = alignment;
364 bo->base.usage = usage;
365 bo->base.size = size;
366 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
367 bo->ws = ws;
368 bo->bo = buf_handle;
369 bo->va = va;
370 bo->va_handle = va_handle;
371 bo->initial_domain = initial_domain;
372 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
373
374 if (initial_domain & RADEON_DOMAIN_VRAM)
375 ws->allocated_vram += align64(size, ws->info.gart_page_size);
376 else if (initial_domain & RADEON_DOMAIN_GTT)
377 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
378
379 amdgpu_add_buffer_to_global_list(bo);
380
381 return bo;
382
383 error_va_map:
384 amdgpu_va_range_free(va_handle);
385
386 error_va_alloc:
387 amdgpu_bo_free(buf_handle);
388
389 error_bo_alloc:
390 FREE(bo);
391 return NULL;
392 }
393
394 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
395 {
396 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
397
398 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
399 return false;
400 }
401
402 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
403 }
404
405 static unsigned eg_tile_split(unsigned tile_split)
406 {
407 switch (tile_split) {
408 case 0: tile_split = 64; break;
409 case 1: tile_split = 128; break;
410 case 2: tile_split = 256; break;
411 case 3: tile_split = 512; break;
412 default:
413 case 4: tile_split = 1024; break;
414 case 5: tile_split = 2048; break;
415 case 6: tile_split = 4096; break;
416 }
417 return tile_split;
418 }
419
420 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
421 {
422 switch (eg_tile_split) {
423 case 64: return 0;
424 case 128: return 1;
425 case 256: return 2;
426 case 512: return 3;
427 default:
428 case 1024: return 4;
429 case 2048: return 5;
430 case 4096: return 6;
431 }
432 }
433
434 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
435 struct radeon_bo_metadata *md)
436 {
437 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
438 struct amdgpu_bo_info info = {0};
439 uint32_t tiling_flags;
440 int r;
441
442 r = amdgpu_bo_query_info(bo->bo, &info);
443 if (r)
444 return;
445
446 tiling_flags = info.metadata.tiling_info;
447
448 md->microtile = RADEON_LAYOUT_LINEAR;
449 md->macrotile = RADEON_LAYOUT_LINEAR;
450
451 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
452 md->macrotile = RADEON_LAYOUT_TILED;
453 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
454 md->microtile = RADEON_LAYOUT_TILED;
455
456 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
457 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
458 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
459 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
460 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
461 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
462 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
463
464 md->size_metadata = info.metadata.size_metadata;
465 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
466 }
467
468 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
469 struct radeon_bo_metadata *md)
470 {
471 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
472 struct amdgpu_bo_metadata metadata = {0};
473 uint32_t tiling_flags = 0;
474
475 if (md->macrotile == RADEON_LAYOUT_TILED)
476 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
477 else if (md->microtile == RADEON_LAYOUT_TILED)
478 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
479 else
480 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
481
482 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
483 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
484 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
485 if (md->tile_split)
486 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
487 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
488 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
489
490 if (md->scanout)
491 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
492 else
493 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
494
495 metadata.tiling_info = tiling_flags;
496 metadata.size_metadata = md->size_metadata;
497 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
498
499 amdgpu_bo_set_metadata(bo->bo, &metadata);
500 }
501
502 static struct pb_buffer *
503 amdgpu_bo_create(struct radeon_winsys *rws,
504 uint64_t size,
505 unsigned alignment,
506 enum radeon_bo_domain domain,
507 enum radeon_bo_flag flags)
508 {
509 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
510 struct amdgpu_winsys_bo *bo;
511 unsigned usage = 0, pb_cache_bucket;
512
513 /* Align size to page size. This is the minimum alignment for normal
514 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
515 * like constant/uniform buffers, can benefit from better and more reuse.
516 */
517 size = align64(size, ws->info.gart_page_size);
518 alignment = align(alignment, ws->info.gart_page_size);
519
520 /* Only set one usage bit each for domains and flags, or the cache manager
521 * might consider different sets of domains / flags compatible
522 */
523 if (domain == RADEON_DOMAIN_VRAM_GTT)
524 usage = 1 << 2;
525 else
526 usage = domain >> 1;
527 assert(flags < sizeof(usage) * 8 - 3);
528 usage |= 1 << (flags + 3);
529
530 /* Determine the pb_cache bucket for minimizing pb_cache misses. */
531 pb_cache_bucket = 0;
532 if (size <= 4096) /* small buffers */
533 pb_cache_bucket += 1;
534 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
535 pb_cache_bucket += 2;
536 if (flags == RADEON_FLAG_GTT_WC) /* WC */
537 pb_cache_bucket += 4;
538 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
539
540 /* Get a buffer from the cache. */
541 bo = (struct amdgpu_winsys_bo*)
542 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
543 pb_cache_bucket);
544 if (bo)
545 return &bo->base;
546
547 /* Create a new one. */
548 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
549 pb_cache_bucket);
550 if (!bo) {
551 /* Clear the cache and try again. */
552 pb_cache_release_all_buffers(&ws->bo_cache);
553 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
554 pb_cache_bucket);
555 if (!bo)
556 return NULL;
557 }
558
559 bo->use_reusable_pool = true;
560 return &bo->base;
561 }
562
563 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
564 struct winsys_handle *whandle,
565 unsigned *stride,
566 unsigned *offset)
567 {
568 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
569 struct amdgpu_winsys_bo *bo;
570 enum amdgpu_bo_handle_type type;
571 struct amdgpu_bo_import_result result = {0};
572 uint64_t va;
573 amdgpu_va_handle va_handle;
574 struct amdgpu_bo_info info = {0};
575 enum radeon_bo_domain initial = 0;
576 int r;
577
578 /* Initialize the structure. */
579 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
580 if (!bo) {
581 return NULL;
582 }
583
584 switch (whandle->type) {
585 case DRM_API_HANDLE_TYPE_SHARED:
586 type = amdgpu_bo_handle_type_gem_flink_name;
587 break;
588 case DRM_API_HANDLE_TYPE_FD:
589 type = amdgpu_bo_handle_type_dma_buf_fd;
590 break;
591 default:
592 return NULL;
593 }
594
595 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
596 if (r)
597 goto error;
598
599 /* Get initial domains. */
600 r = amdgpu_bo_query_info(result.buf_handle, &info);
601 if (r)
602 goto error_query;
603
604 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
605 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
606 if (r)
607 goto error_query;
608
609 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
610 if (r)
611 goto error_va_map;
612
613 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
614 initial |= RADEON_DOMAIN_VRAM;
615 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
616 initial |= RADEON_DOMAIN_GTT;
617
618
619 pipe_reference_init(&bo->base.reference, 1);
620 bo->base.alignment = info.phys_alignment;
621 bo->bo = result.buf_handle;
622 bo->base.size = result.alloc_size;
623 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
624 bo->ws = ws;
625 bo->va = va;
626 bo->va_handle = va_handle;
627 bo->initial_domain = initial;
628 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
629 bo->is_shared = true;
630
631 if (stride)
632 *stride = whandle->stride;
633 if (offset)
634 *offset = whandle->offset;
635
636 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
637 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
638 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
639 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
640
641 amdgpu_add_buffer_to_global_list(bo);
642
643 return &bo->base;
644
645 error_va_map:
646 amdgpu_va_range_free(va_handle);
647
648 error_query:
649 amdgpu_bo_free(result.buf_handle);
650
651 error:
652 FREE(bo);
653 return NULL;
654 }
655
656 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
657 unsigned stride, unsigned offset,
658 unsigned slice_size,
659 struct winsys_handle *whandle)
660 {
661 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
662 enum amdgpu_bo_handle_type type;
663 int r;
664
665 bo->use_reusable_pool = false;
666
667 switch (whandle->type) {
668 case DRM_API_HANDLE_TYPE_SHARED:
669 type = amdgpu_bo_handle_type_gem_flink_name;
670 break;
671 case DRM_API_HANDLE_TYPE_FD:
672 type = amdgpu_bo_handle_type_dma_buf_fd;
673 break;
674 case DRM_API_HANDLE_TYPE_KMS:
675 type = amdgpu_bo_handle_type_kms;
676 break;
677 default:
678 return false;
679 }
680
681 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
682 if (r)
683 return false;
684
685 whandle->stride = stride;
686 whandle->offset = offset;
687 whandle->offset += slice_size * whandle->layer;
688 bo->is_shared = true;
689 return true;
690 }
691
692 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
693 void *pointer, uint64_t size)
694 {
695 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
696 amdgpu_bo_handle buf_handle;
697 struct amdgpu_winsys_bo *bo;
698 uint64_t va;
699 amdgpu_va_handle va_handle;
700
701 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
702 if (!bo)
703 return NULL;
704
705 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
706 goto error;
707
708 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
709 size, 1 << 12, 0, &va, &va_handle, 0))
710 goto error_va_alloc;
711
712 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
713 goto error_va_map;
714
715 /* Initialize it. */
716 pipe_reference_init(&bo->base.reference, 1);
717 bo->bo = buf_handle;
718 bo->base.alignment = 0;
719 bo->base.size = size;
720 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
721 bo->ws = ws;
722 bo->user_ptr = pointer;
723 bo->va = va;
724 bo->va_handle = va_handle;
725 bo->initial_domain = RADEON_DOMAIN_GTT;
726 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
727
728 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
729
730 amdgpu_add_buffer_to_global_list(bo);
731
732 return (struct pb_buffer*)bo;
733
734 error_va_map:
735 amdgpu_va_range_free(va_handle);
736
737 error_va_alloc:
738 amdgpu_bo_free(buf_handle);
739
740 error:
741 FREE(bo);
742 return NULL;
743 }
744
745 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
746 {
747 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
748 }
749
750 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
751 {
752 return ((struct amdgpu_winsys_bo*)buf)->va;
753 }
754
755 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
756 {
757 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
758 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
759 ws->base.buffer_map = amdgpu_bo_map;
760 ws->base.buffer_unmap = amdgpu_bo_unmap;
761 ws->base.buffer_wait = amdgpu_bo_wait;
762 ws->base.buffer_create = amdgpu_bo_create;
763 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
764 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
765 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
766 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
767 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
768 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
769 }