99bd44226aebdd9a256e954cda0571b5de42df1f
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_bo.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32
33 #include <amdgpu.h>
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42
43 #define AMDGPU_TILING_SCANOUT_SHIFT 63
44 #define AMDGPU_TILING_SCANOUT_MASK 1
45
46 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
47
48 static int
49 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
50 amdgpu_bo_handle bo,
51 uint64_t offset,
52 uint64_t size,
53 uint64_t addr,
54 uint32_t bo_flags,
55 uint32_t ops)
56 {
57 uint64_t flags = AMDGPU_VM_PAGE_READABLE |
58 AMDGPU_VM_PAGE_EXECUTABLE;
59
60 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
61 flags |= AMDGPU_VM_MTYPE_UC;
62
63 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
64 flags |= AMDGPU_VM_PAGE_WRITEABLE;
65
66 size = align64(size, getpagesize());
67
68 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
69 flags, ops);
70 }
71
72 static void
73 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
74 const struct radv_amdgpu_map_range *range)
75 {
76 assert(range->size);
77
78 if (!range->bo)
79 return; /* TODO: PRT mapping */
80
81 p_atomic_inc(&range->bo->ref_count);
82 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
83 range->size, range->offset + bo->base.va,
84 0, AMDGPU_VA_OP_MAP);
85 if (r)
86 abort();
87 }
88
89 static void
90 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
91 const struct radv_amdgpu_map_range *range)
92 {
93 assert(range->size);
94
95 if (!range->bo)
96 return; /* TODO: PRT mapping */
97
98 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
99 range->size, range->offset + bo->base.va,
100 0, AMDGPU_VA_OP_UNMAP);
101 if (r)
102 abort();
103 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
104 }
105
106 static int bo_comparator(const void *ap, const void *bp) {
107 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
108 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
109 return (a > b) ? 1 : (a < b) ? -1 : 0;
110 }
111
112 static void
113 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
114 {
115 if (bo->bo_capacity < bo->range_count) {
116 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
117 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
118 bo->bo_capacity = new_count;
119 }
120
121 uint32_t temp_bo_count = 0;
122 for (uint32_t i = 0; i < bo->range_count; ++i)
123 if (bo->ranges[i].bo)
124 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
125
126 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
127
128 uint32_t final_bo_count = 1;
129 for (uint32_t i = 1; i < temp_bo_count; ++i)
130 if (bo->bos[i] != bo->bos[i - 1])
131 bo->bos[final_bo_count++] = bo->bos[i];
132
133 bo->bo_count = final_bo_count;
134 }
135
136 static void
137 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
138 uint64_t offset, uint64_t size,
139 struct radeon_winsys_bo *_bo, uint64_t bo_offset)
140 {
141 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
142 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
143 int range_count_delta, new_idx;
144 int first = 0, last;
145 struct radv_amdgpu_map_range new_first, new_last;
146
147 assert(parent->is_virtual);
148 assert(!bo || !bo->is_virtual);
149
150 if (!size)
151 return;
152
153 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
154 if (parent->range_capacity - parent->range_count < 2) {
155 parent->range_capacity += 2;
156 parent->ranges = realloc(parent->ranges,
157 parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
158 }
159
160 /*
161 * [first, last] is exactly the range of ranges that either overlap the
162 * new parent, or are adjacent to it. This corresponds to the bind ranges
163 * that may change.
164 */
165 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
166 ++first;
167
168 last = first;
169 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
170 ++last;
171
172 /* Whether the first or last range are going to be totally removed or just
173 * resized/left alone. Note that in the case of first == last, we will split
174 * this into a part before and after the new range. The remove flag is then
175 * whether to not create the corresponding split part. */
176 bool remove_first = parent->ranges[first].offset == offset;
177 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
178 bool unmapped_first = false;
179
180 assert(parent->ranges[first].offset <= offset);
181 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
182
183 /* Try to merge the new range with the first range. */
184 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
185 size += offset - parent->ranges[first].offset;
186 offset = parent->ranges[first].offset;
187 bo_offset = parent->ranges[first].bo_offset;
188 remove_first = true;
189 }
190
191 /* Try to merge the new range with the last range. */
192 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
193 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
194 remove_last = true;
195 }
196
197 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
198 new_idx = first + !remove_first;
199
200 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
201 for (int i = first + 1; i < last; ++i)
202 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
203
204 /* If the first/last range are not left alone we unmap then and optionally map
205 * them again after modifications. Not that this implicitly can do the splitting
206 * if first == last. */
207 new_first = parent->ranges[first];
208 new_last = parent->ranges[last];
209
210 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
211 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
212 unmapped_first = true;
213
214 if (!remove_first) {
215 new_first.size = offset - new_first.offset;
216 radv_amdgpu_winsys_virtual_map(parent, &new_first);
217 }
218 }
219
220 if (parent->ranges[last].offset < offset + size || remove_last) {
221 if (first != last || !unmapped_first)
222 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
223
224 if (!remove_last) {
225 new_last.size -= offset + size - new_last.offset;
226 new_last.offset = offset + size;
227 radv_amdgpu_winsys_virtual_map(parent, &new_last);
228 }
229 }
230
231 /* Moves the range list after last to account for the changed number of ranges. */
232 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
233 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
234
235 if (!remove_first)
236 parent->ranges[first] = new_first;
237
238 if (!remove_last)
239 parent->ranges[new_idx + 1] = new_last;
240
241 /* Actually set up the new range. */
242 parent->ranges[new_idx].offset = offset;
243 parent->ranges[new_idx].size = size;
244 parent->ranges[new_idx].bo = bo;
245 parent->ranges[new_idx].bo_offset = bo_offset;
246
247 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
248
249 parent->range_count += range_count_delta;
250
251 radv_amdgpu_winsys_rebuild_bo_list(parent);
252 }
253
254 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
255 {
256 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
257 struct radv_amdgpu_winsys *ws = bo->ws;
258
259 if (p_atomic_dec_return(&bo->ref_count))
260 return;
261 if (bo->is_virtual) {
262 for (uint32_t i = 0; i < bo->range_count; ++i) {
263 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
264 }
265 free(bo->bos);
266 free(bo->ranges);
267 } else {
268 if (bo->ws->debug_all_bos) {
269 pthread_mutex_lock(&bo->ws->global_bo_list_lock);
270 list_del(&bo->global_list_item);
271 bo->ws->num_buffers--;
272 pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
273 }
274 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
275 0, AMDGPU_VA_OP_UNMAP);
276 amdgpu_bo_free(bo->bo);
277 }
278
279 if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
280 if (bo->base.vram_no_cpu_access) {
281 p_atomic_add(&ws->allocated_vram,
282 -align64(bo->size, ws->info.gart_page_size));
283 } else {
284 p_atomic_add(&ws->allocated_vram_vis,
285 -align64(bo->size, ws->info.gart_page_size));
286 }
287 }
288
289 if (bo->initial_domain & RADEON_DOMAIN_GTT)
290 p_atomic_add(&ws->allocated_gtt,
291 -align64(bo->size, ws->info.gart_page_size));
292
293 amdgpu_va_range_free(bo->va_handle);
294 FREE(bo);
295 }
296
297 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
298 {
299 struct radv_amdgpu_winsys *ws = bo->ws;
300
301 if (bo->ws->debug_all_bos) {
302 pthread_mutex_lock(&ws->global_bo_list_lock);
303 list_addtail(&bo->global_list_item, &ws->global_bo_list);
304 ws->num_buffers++;
305 pthread_mutex_unlock(&ws->global_bo_list_lock);
306 }
307 }
308
309 static struct radeon_winsys_bo *
310 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
311 uint64_t size,
312 unsigned alignment,
313 enum radeon_bo_domain initial_domain,
314 unsigned flags,
315 unsigned priority)
316 {
317 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
318 struct radv_amdgpu_winsys_bo *bo;
319 struct amdgpu_bo_alloc_request request = {0};
320 amdgpu_bo_handle buf_handle;
321 uint64_t va = 0;
322 amdgpu_va_handle va_handle;
323 int r;
324 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
325 if (!bo) {
326 return NULL;
327 }
328
329 unsigned virt_alignment = alignment;
330 if (size >= ws->info.pte_fragment_size)
331 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
332
333 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
334 size, virt_alignment, 0, &va, &va_handle,
335 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
336 AMDGPU_VA_RANGE_HIGH);
337 if (r)
338 goto error_va_alloc;
339
340 bo->base.va = va;
341 bo->va_handle = va_handle;
342 bo->size = size;
343 bo->ws = ws;
344 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
345 bo->ref_count = 1;
346
347 if (flags & RADEON_FLAG_VIRTUAL) {
348 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
349 bo->range_count = 1;
350 bo->range_capacity = 1;
351
352 bo->ranges[0].offset = 0;
353 bo->ranges[0].size = size;
354 bo->ranges[0].bo = NULL;
355 bo->ranges[0].bo_offset = 0;
356
357 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
358 return (struct radeon_winsys_bo *)bo;
359 }
360
361 request.alloc_size = size;
362 request.phys_alignment = alignment;
363
364 if (initial_domain & RADEON_DOMAIN_VRAM)
365 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
366 if (initial_domain & RADEON_DOMAIN_GTT)
367 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
368 if (initial_domain & RADEON_DOMAIN_GDS)
369 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
370 if (initial_domain & RADEON_DOMAIN_OA)
371 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
372
373 if (flags & RADEON_FLAG_CPU_ACCESS)
374 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
375 if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
376 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
377 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
378 }
379 if (flags & RADEON_FLAG_GTT_WC)
380 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
381 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
382 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
383 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
384 ws->info.has_local_buffers &&
385 (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
386 bo->base.is_local = true;
387 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
388 }
389
390 /* this won't do anything on pre 4.9 kernels */
391 if (initial_domain & RADEON_DOMAIN_VRAM) {
392 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
393 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
394 }
395
396 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
397 if (r) {
398 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
399 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
400 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
401 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
402 goto error_bo_alloc;
403 }
404
405 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags,
406 AMDGPU_VA_OP_MAP);
407 if (r)
408 goto error_va_map;
409
410 bo->bo = buf_handle;
411 bo->initial_domain = initial_domain;
412 bo->is_shared = false;
413 bo->priority = priority;
414
415 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
416 assert(!r);
417
418 if (initial_domain & RADEON_DOMAIN_VRAM) {
419 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
420 * aren't mappable and they are counted as part of the VRAM
421 * counter.
422 *
423 * Otherwise, buffers with the CPU_ACCESS flag or without any
424 * of both (imported buffers) are counted as part of the VRAM
425 * visible counter because they can be mapped.
426 */
427 if (bo->base.vram_no_cpu_access) {
428 p_atomic_add(&ws->allocated_vram,
429 align64(bo->size, ws->info.gart_page_size));
430 } else {
431 p_atomic_add(&ws->allocated_vram_vis,
432 align64(bo->size, ws->info.gart_page_size));
433 }
434 }
435
436 if (initial_domain & RADEON_DOMAIN_GTT)
437 p_atomic_add(&ws->allocated_gtt,
438 align64(bo->size, ws->info.gart_page_size));
439
440 radv_amdgpu_add_buffer_to_global_list(bo);
441 return (struct radeon_winsys_bo *)bo;
442 error_va_map:
443 amdgpu_bo_free(buf_handle);
444
445 error_bo_alloc:
446 amdgpu_va_range_free(va_handle);
447
448 error_va_alloc:
449 FREE(bo);
450 return NULL;
451 }
452
453 static void *
454 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
455 {
456 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
457 int ret;
458 void *data;
459 ret = amdgpu_bo_cpu_map(bo->bo, &data);
460 if (ret)
461 return NULL;
462 return data;
463 }
464
465 static void
466 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
467 {
468 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
469 amdgpu_bo_cpu_unmap(bo->bo);
470 }
471
472 static uint64_t
473 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
474 uint64_t size, unsigned alignment)
475 {
476 uint64_t vm_alignment = alignment;
477
478 /* Increase the VM alignment for faster address translation. */
479 if (size >= ws->info.pte_fragment_size)
480 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
481
482 /* Gfx9: Increase the VM alignment to the most significant bit set
483 * in the size for faster address translation.
484 */
485 if (ws->info.chip_class >= GFX9) {
486 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
487 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
488
489 vm_alignment = MAX2(vm_alignment, msb_alignment);
490 }
491 return vm_alignment;
492 }
493
494 static struct radeon_winsys_bo *
495 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
496 void *pointer,
497 uint64_t size,
498 unsigned priority)
499 {
500 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
501 amdgpu_bo_handle buf_handle;
502 struct radv_amdgpu_winsys_bo *bo;
503 uint64_t va;
504 amdgpu_va_handle va_handle;
505 uint64_t vm_alignment;
506
507 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
508 if (!bo)
509 return NULL;
510
511 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
512 goto error;
513
514 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
515 * are imported.
516 */
517 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
518 ws->info.gart_page_size);
519
520 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
521 size, vm_alignment, 0, &va, &va_handle,
522 AMDGPU_VA_RANGE_HIGH))
523 goto error_va_alloc;
524
525 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
526 goto error_va_map;
527
528 /* Initialize it */
529 bo->base.va = va;
530 bo->va_handle = va_handle;
531 bo->size = size;
532 bo->ref_count = 1;
533 bo->ws = ws;
534 bo->bo = buf_handle;
535 bo->initial_domain = RADEON_DOMAIN_GTT;
536 bo->priority = priority;
537
538 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
539 assert(!r);
540
541 p_atomic_add(&ws->allocated_gtt,
542 align64(bo->size, ws->info.gart_page_size));
543
544 radv_amdgpu_add_buffer_to_global_list(bo);
545 return (struct radeon_winsys_bo *)bo;
546
547 error_va_map:
548 amdgpu_va_range_free(va_handle);
549
550 error_va_alloc:
551 amdgpu_bo_free(buf_handle);
552
553 error:
554 FREE(bo);
555 return NULL;
556 }
557
558 static struct radeon_winsys_bo *
559 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
560 int fd, unsigned priority,
561 uint64_t *alloc_size)
562 {
563 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
564 struct radv_amdgpu_winsys_bo *bo;
565 uint64_t va;
566 amdgpu_va_handle va_handle;
567 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
568 struct amdgpu_bo_import_result result = {0};
569 struct amdgpu_bo_info info = {0};
570 enum radeon_bo_domain initial = 0;
571 int r;
572 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
573 if (!bo)
574 return NULL;
575
576 r = amdgpu_bo_import(ws->dev, type, fd, &result);
577 if (r)
578 goto error;
579
580 r = amdgpu_bo_query_info(result.buf_handle, &info);
581 if (r)
582 goto error_query;
583
584 if (alloc_size) {
585 *alloc_size = info.alloc_size;
586 }
587
588 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
589 result.alloc_size, 1 << 20, 0, &va, &va_handle,
590 AMDGPU_VA_RANGE_HIGH);
591 if (r)
592 goto error_query;
593
594 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
595 va, 0, AMDGPU_VA_OP_MAP);
596 if (r)
597 goto error_va_map;
598
599 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
600 initial |= RADEON_DOMAIN_VRAM;
601 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
602 initial |= RADEON_DOMAIN_GTT;
603
604 bo->bo = result.buf_handle;
605 bo->base.va = va;
606 bo->va_handle = va_handle;
607 bo->initial_domain = initial;
608 bo->size = result.alloc_size;
609 bo->is_shared = true;
610 bo->ws = ws;
611 bo->priority = priority;
612 bo->ref_count = 1;
613
614 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
615 assert(!r);
616
617 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
618 p_atomic_add(&ws->allocated_vram,
619 align64(bo->size, ws->info.gart_page_size));
620 if (bo->initial_domain & RADEON_DOMAIN_GTT)
621 p_atomic_add(&ws->allocated_gtt,
622 align64(bo->size, ws->info.gart_page_size));
623
624 radv_amdgpu_add_buffer_to_global_list(bo);
625 return (struct radeon_winsys_bo *)bo;
626 error_va_map:
627 amdgpu_va_range_free(va_handle);
628
629 error_query:
630 amdgpu_bo_free(result.buf_handle);
631
632 error:
633 FREE(bo);
634 return NULL;
635 }
636
637 static bool
638 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
639 struct radeon_winsys_bo *_bo,
640 int *fd)
641 {
642 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
643 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
644 int r;
645 unsigned handle;
646 r = amdgpu_bo_export(bo->bo, type, &handle);
647 if (r)
648 return false;
649
650 *fd = (int)handle;
651 bo->is_shared = true;
652 return true;
653 }
654
655 static bool
656 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
657 enum radeon_bo_domain *domains,
658 enum radeon_bo_flag *flags)
659 {
660 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
661 struct amdgpu_bo_import_result result = {0};
662 struct amdgpu_bo_info info = {0};
663 int r;
664
665 *domains = 0;
666 *flags = 0;
667
668 r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
669 if (r)
670 return false;
671
672 r = amdgpu_bo_query_info(result.buf_handle, &info);
673 amdgpu_bo_free(result.buf_handle);
674 if (r)
675 return false;
676
677 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
678 *domains |= RADEON_DOMAIN_VRAM;
679 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
680 *domains |= RADEON_DOMAIN_GTT;
681 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
682 *domains |= RADEON_DOMAIN_GDS;
683 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
684 *domains |= RADEON_DOMAIN_OA;
685
686 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
687 *flags |= RADEON_FLAG_CPU_ACCESS;
688 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
689 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
690 if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
691 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
692 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
693 *flags |= RADEON_FLAG_GTT_WC;
694 if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
695 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
696 if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
697 *flags |= RADEON_FLAG_ZERO_VRAM;
698 return true;
699 }
700
701 static unsigned eg_tile_split(unsigned tile_split)
702 {
703 switch (tile_split) {
704 case 0: tile_split = 64; break;
705 case 1: tile_split = 128; break;
706 case 2: tile_split = 256; break;
707 case 3: tile_split = 512; break;
708 default:
709 case 4: tile_split = 1024; break;
710 case 5: tile_split = 2048; break;
711 case 6: tile_split = 4096; break;
712 }
713 return tile_split;
714 }
715
716 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
717 {
718 switch (eg_tile_split) {
719 case 64: return 0;
720 case 128: return 1;
721 case 256: return 2;
722 case 512: return 3;
723 default:
724 case 1024: return 4;
725 case 2048: return 5;
726 case 4096: return 6;
727 }
728 }
729
730 static void
731 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
732 struct radeon_bo_metadata *md)
733 {
734 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
735 struct amdgpu_bo_metadata metadata = {0};
736 uint64_t tiling_flags = 0;
737
738 if (bo->ws->info.chip_class >= GFX9) {
739 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
740 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
741 } else {
742 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
743 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
744 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
745 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
746 else
747 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
748
749 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
750 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
751 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
752 if (md->u.legacy.tile_split)
753 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
754 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
755 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
756
757 if (md->u.legacy.scanout)
758 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
759 else
760 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
761 }
762
763 metadata.tiling_info = tiling_flags;
764 metadata.size_metadata = md->size_metadata;
765 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
766
767 amdgpu_bo_set_metadata(bo->bo, &metadata);
768 }
769
770 static void
771 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
772 struct radeon_bo_metadata *md)
773 {
774 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
775 struct amdgpu_bo_info info = {0};
776
777 int r = amdgpu_bo_query_info(bo->bo, &info);
778 if (r)
779 return;
780
781 uint64_t tiling_flags = info.metadata.tiling_info;
782
783 if (bo->ws->info.chip_class >= GFX9) {
784 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
785 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
786 } else {
787 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
788 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
789
790 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
791 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
792 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
793 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
794
795 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
796 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
797 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
798 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
799 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
800 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
801 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
802 }
803
804 md->size_metadata = info.metadata.size_metadata;
805 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
806 }
807
808 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
809 {
810 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
811 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
812 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
813 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
814 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
815 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
816 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
817 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
818 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
819 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
820 ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
821 }