radv/winsys: Finish mapping for sparse residency.
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_bo.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32
33 #include <amdgpu.h>
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42
43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
44
45 static int
46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
47 amdgpu_bo_handle bo,
48 uint64_t offset,
49 uint64_t size,
50 uint64_t addr,
51 uint32_t bo_flags,
52 uint64_t internal_flags,
53 uint32_t ops)
54 {
55 uint64_t flags = internal_flags;
56 if (bo) {
57 flags = AMDGPU_VM_PAGE_READABLE |
58 AMDGPU_VM_PAGE_EXECUTABLE;
59
60 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) &&
61 ws->info.chip_class >= GFX9)
62 flags |= AMDGPU_VM_MTYPE_UC;
63
64 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
65 flags |= AMDGPU_VM_PAGE_WRITEABLE;
66 }
67
68 size = align64(size, getpagesize());
69
70 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
71 flags, ops);
72 }
73
74 static void
75 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
76 const struct radv_amdgpu_map_range *range)
77 {
78 uint64_t internal_flags = 0;
79 assert(range->size);
80
81 if (!range->bo) {
82 if (!bo->ws->info.has_sparse_vm_mappings)
83 return;
84
85 internal_flags |= AMDGPU_VM_PAGE_PRT;
86 } else
87 p_atomic_inc(&range->bo->ref_count);
88
89 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
90 range->bo_offset, range->size,
91 range->offset + bo->base.va, 0,
92 internal_flags, AMDGPU_VA_OP_MAP);
93 if (r)
94 abort();
95 }
96
97 static void
98 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
99 const struct radv_amdgpu_map_range *range)
100 {
101 uint64_t internal_flags = 0;
102 assert(range->size);
103
104 if (!range->bo) {
105 if(!bo->ws->info.has_sparse_vm_mappings)
106 return;
107
108 /* Even though this is an unmap, if we don't set this flag,
109 AMDGPU is going to complain about the missing buffer. */
110 internal_flags |= AMDGPU_VM_PAGE_PRT;
111 }
112
113 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
114 range->bo_offset, range->size,
115 range->offset + bo->base.va, 0, internal_flags,
116 AMDGPU_VA_OP_UNMAP);
117 if (r)
118 abort();
119
120 if (range->bo)
121 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
122 }
123
124 static int bo_comparator(const void *ap, const void *bp) {
125 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
126 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
127 return (a > b) ? 1 : (a < b) ? -1 : 0;
128 }
129
130 static void
131 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
132 {
133 if (bo->bo_capacity < bo->range_count) {
134 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
135 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
136 bo->bo_capacity = new_count;
137 }
138
139 uint32_t temp_bo_count = 0;
140 for (uint32_t i = 0; i < bo->range_count; ++i)
141 if (bo->ranges[i].bo)
142 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
143
144 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
145
146 uint32_t final_bo_count = 1;
147 for (uint32_t i = 1; i < temp_bo_count; ++i)
148 if (bo->bos[i] != bo->bos[i - 1])
149 bo->bos[final_bo_count++] = bo->bos[i];
150
151 bo->bo_count = final_bo_count;
152 }
153
154 static void
155 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
156 uint64_t offset, uint64_t size,
157 struct radeon_winsys_bo *_bo, uint64_t bo_offset)
158 {
159 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
160 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
161 int range_count_delta, new_idx;
162 int first = 0, last;
163 struct radv_amdgpu_map_range new_first, new_last;
164
165 assert(parent->is_virtual);
166 assert(!bo || !bo->is_virtual);
167
168 if (!size)
169 return;
170
171 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
172 if (parent->range_capacity - parent->range_count < 2) {
173 parent->range_capacity += 2;
174 parent->ranges = realloc(parent->ranges,
175 parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
176 }
177
178 /*
179 * [first, last] is exactly the range of ranges that either overlap the
180 * new parent, or are adjacent to it. This corresponds to the bind ranges
181 * that may change.
182 */
183 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
184 ++first;
185
186 last = first;
187 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
188 ++last;
189
190 /* Whether the first or last range are going to be totally removed or just
191 * resized/left alone. Note that in the case of first == last, we will split
192 * this into a part before and after the new range. The remove flag is then
193 * whether to not create the corresponding split part. */
194 bool remove_first = parent->ranges[first].offset == offset;
195 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
196 bool unmapped_first = false;
197
198 assert(parent->ranges[first].offset <= offset);
199 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
200
201 /* Try to merge the new range with the first range. */
202 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
203 size += offset - parent->ranges[first].offset;
204 offset = parent->ranges[first].offset;
205 bo_offset = parent->ranges[first].bo_offset;
206 remove_first = true;
207 }
208
209 /* Try to merge the new range with the last range. */
210 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
211 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
212 remove_last = true;
213 }
214
215 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
216 new_idx = first + !remove_first;
217
218 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
219 for (int i = first + 1; i < last; ++i)
220 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
221
222 /* If the first/last range are not left alone we unmap then and optionally map
223 * them again after modifications. Not that this implicitly can do the splitting
224 * if first == last. */
225 new_first = parent->ranges[first];
226 new_last = parent->ranges[last];
227
228 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
229 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
230 unmapped_first = true;
231
232 if (!remove_first) {
233 new_first.size = offset - new_first.offset;
234 radv_amdgpu_winsys_virtual_map(parent, &new_first);
235 }
236 }
237
238 if (parent->ranges[last].offset < offset + size || remove_last) {
239 if (first != last || !unmapped_first)
240 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
241
242 if (!remove_last) {
243 new_last.size -= offset + size - new_last.offset;
244 new_last.offset = offset + size;
245 radv_amdgpu_winsys_virtual_map(parent, &new_last);
246 }
247 }
248
249 /* Moves the range list after last to account for the changed number of ranges. */
250 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
251 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
252
253 if (!remove_first)
254 parent->ranges[first] = new_first;
255
256 if (!remove_last)
257 parent->ranges[new_idx + 1] = new_last;
258
259 /* Actually set up the new range. */
260 parent->ranges[new_idx].offset = offset;
261 parent->ranges[new_idx].size = size;
262 parent->ranges[new_idx].bo = bo;
263 parent->ranges[new_idx].bo_offset = bo_offset;
264
265 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
266
267 parent->range_count += range_count_delta;
268
269 radv_amdgpu_winsys_rebuild_bo_list(parent);
270 }
271
272 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
273 {
274 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
275 struct radv_amdgpu_winsys *ws = bo->ws;
276
277 if (p_atomic_dec_return(&bo->ref_count))
278 return;
279 if (bo->is_virtual) {
280 for (uint32_t i = 0; i < bo->range_count; ++i) {
281 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
282 }
283 free(bo->bos);
284 free(bo->ranges);
285 } else {
286 if (bo->ws->debug_all_bos) {
287 pthread_mutex_lock(&bo->ws->global_bo_list_lock);
288 list_del(&bo->global_list_item);
289 bo->ws->num_buffers--;
290 pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
291 }
292 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
293 0, 0, AMDGPU_VA_OP_UNMAP);
294 amdgpu_bo_free(bo->bo);
295 }
296
297 if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
298 if (bo->base.vram_no_cpu_access) {
299 p_atomic_add(&ws->allocated_vram,
300 -align64(bo->size, ws->info.gart_page_size));
301 } else {
302 p_atomic_add(&ws->allocated_vram_vis,
303 -align64(bo->size, ws->info.gart_page_size));
304 }
305 }
306
307 if (bo->initial_domain & RADEON_DOMAIN_GTT)
308 p_atomic_add(&ws->allocated_gtt,
309 -align64(bo->size, ws->info.gart_page_size));
310
311 amdgpu_va_range_free(bo->va_handle);
312 FREE(bo);
313 }
314
315 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
316 {
317 struct radv_amdgpu_winsys *ws = bo->ws;
318
319 if (bo->ws->debug_all_bos) {
320 pthread_mutex_lock(&ws->global_bo_list_lock);
321 list_addtail(&bo->global_list_item, &ws->global_bo_list);
322 ws->num_buffers++;
323 pthread_mutex_unlock(&ws->global_bo_list_lock);
324 }
325 }
326
327 static struct radeon_winsys_bo *
328 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
329 uint64_t size,
330 unsigned alignment,
331 enum radeon_bo_domain initial_domain,
332 unsigned flags,
333 unsigned priority)
334 {
335 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
336 struct radv_amdgpu_winsys_bo *bo;
337 struct amdgpu_bo_alloc_request request = {0};
338 amdgpu_bo_handle buf_handle;
339 uint64_t va = 0;
340 amdgpu_va_handle va_handle;
341 int r;
342 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
343 if (!bo) {
344 return NULL;
345 }
346
347 unsigned virt_alignment = alignment;
348 if (size >= ws->info.pte_fragment_size)
349 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
350
351 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
352 size, virt_alignment, 0, &va, &va_handle,
353 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
354 AMDGPU_VA_RANGE_HIGH);
355 if (r)
356 goto error_va_alloc;
357
358 bo->base.va = va;
359 bo->va_handle = va_handle;
360 bo->size = size;
361 bo->ws = ws;
362 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
363 bo->ref_count = 1;
364
365 if (flags & RADEON_FLAG_VIRTUAL) {
366 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
367 bo->range_count = 1;
368 bo->range_capacity = 1;
369
370 bo->ranges[0].offset = 0;
371 bo->ranges[0].size = size;
372 bo->ranges[0].bo = NULL;
373 bo->ranges[0].bo_offset = 0;
374
375 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
376 return (struct radeon_winsys_bo *)bo;
377 }
378
379 request.alloc_size = size;
380 request.phys_alignment = alignment;
381
382 if (initial_domain & RADEON_DOMAIN_VRAM)
383 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
384 if (initial_domain & RADEON_DOMAIN_GTT)
385 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
386 if (initial_domain & RADEON_DOMAIN_GDS)
387 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
388 if (initial_domain & RADEON_DOMAIN_OA)
389 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
390
391 if (flags & RADEON_FLAG_CPU_ACCESS)
392 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
393 if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
394 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
395 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
396 }
397 if (flags & RADEON_FLAG_GTT_WC)
398 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
399 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
400 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
401 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
402 ws->info.has_local_buffers &&
403 (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
404 bo->base.is_local = true;
405 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
406 }
407
408 /* this won't do anything on pre 4.9 kernels */
409 if (initial_domain & RADEON_DOMAIN_VRAM) {
410 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
411 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
412 }
413
414 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
415 if (r) {
416 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
417 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
418 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
419 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
420 goto error_bo_alloc;
421 }
422
423 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0,
424 AMDGPU_VA_OP_MAP);
425 if (r)
426 goto error_va_map;
427
428 bo->bo = buf_handle;
429 bo->initial_domain = initial_domain;
430 bo->is_shared = false;
431 bo->priority = priority;
432
433 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
434 assert(!r);
435
436 if (initial_domain & RADEON_DOMAIN_VRAM) {
437 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
438 * aren't mappable and they are counted as part of the VRAM
439 * counter.
440 *
441 * Otherwise, buffers with the CPU_ACCESS flag or without any
442 * of both (imported buffers) are counted as part of the VRAM
443 * visible counter because they can be mapped.
444 */
445 if (bo->base.vram_no_cpu_access) {
446 p_atomic_add(&ws->allocated_vram,
447 align64(bo->size, ws->info.gart_page_size));
448 } else {
449 p_atomic_add(&ws->allocated_vram_vis,
450 align64(bo->size, ws->info.gart_page_size));
451 }
452 }
453
454 if (initial_domain & RADEON_DOMAIN_GTT)
455 p_atomic_add(&ws->allocated_gtt,
456 align64(bo->size, ws->info.gart_page_size));
457
458 radv_amdgpu_add_buffer_to_global_list(bo);
459 return (struct radeon_winsys_bo *)bo;
460 error_va_map:
461 amdgpu_bo_free(buf_handle);
462
463 error_bo_alloc:
464 amdgpu_va_range_free(va_handle);
465
466 error_va_alloc:
467 FREE(bo);
468 return NULL;
469 }
470
471 static void *
472 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
473 {
474 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
475 int ret;
476 void *data;
477 ret = amdgpu_bo_cpu_map(bo->bo, &data);
478 if (ret)
479 return NULL;
480 return data;
481 }
482
483 static void
484 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
485 {
486 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
487 amdgpu_bo_cpu_unmap(bo->bo);
488 }
489
490 static uint64_t
491 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
492 uint64_t size, unsigned alignment)
493 {
494 uint64_t vm_alignment = alignment;
495
496 /* Increase the VM alignment for faster address translation. */
497 if (size >= ws->info.pte_fragment_size)
498 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
499
500 /* Gfx9: Increase the VM alignment to the most significant bit set
501 * in the size for faster address translation.
502 */
503 if (ws->info.chip_class >= GFX9) {
504 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
505 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
506
507 vm_alignment = MAX2(vm_alignment, msb_alignment);
508 }
509 return vm_alignment;
510 }
511
512 static struct radeon_winsys_bo *
513 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
514 void *pointer,
515 uint64_t size,
516 unsigned priority)
517 {
518 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
519 amdgpu_bo_handle buf_handle;
520 struct radv_amdgpu_winsys_bo *bo;
521 uint64_t va;
522 amdgpu_va_handle va_handle;
523 uint64_t vm_alignment;
524
525 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
526 if (!bo)
527 return NULL;
528
529 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
530 goto error;
531
532 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
533 * are imported.
534 */
535 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
536 ws->info.gart_page_size);
537
538 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
539 size, vm_alignment, 0, &va, &va_handle,
540 AMDGPU_VA_RANGE_HIGH))
541 goto error_va_alloc;
542
543 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
544 goto error_va_map;
545
546 /* Initialize it */
547 bo->base.va = va;
548 bo->va_handle = va_handle;
549 bo->size = size;
550 bo->ref_count = 1;
551 bo->ws = ws;
552 bo->bo = buf_handle;
553 bo->initial_domain = RADEON_DOMAIN_GTT;
554 bo->priority = priority;
555
556 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
557 assert(!r);
558
559 p_atomic_add(&ws->allocated_gtt,
560 align64(bo->size, ws->info.gart_page_size));
561
562 radv_amdgpu_add_buffer_to_global_list(bo);
563 return (struct radeon_winsys_bo *)bo;
564
565 error_va_map:
566 amdgpu_va_range_free(va_handle);
567
568 error_va_alloc:
569 amdgpu_bo_free(buf_handle);
570
571 error:
572 FREE(bo);
573 return NULL;
574 }
575
576 static struct radeon_winsys_bo *
577 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
578 int fd, unsigned priority,
579 uint64_t *alloc_size)
580 {
581 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
582 struct radv_amdgpu_winsys_bo *bo;
583 uint64_t va;
584 amdgpu_va_handle va_handle;
585 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
586 struct amdgpu_bo_import_result result = {0};
587 struct amdgpu_bo_info info = {0};
588 enum radeon_bo_domain initial = 0;
589 int r;
590 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
591 if (!bo)
592 return NULL;
593
594 r = amdgpu_bo_import(ws->dev, type, fd, &result);
595 if (r)
596 goto error;
597
598 r = amdgpu_bo_query_info(result.buf_handle, &info);
599 if (r)
600 goto error_query;
601
602 if (alloc_size) {
603 *alloc_size = info.alloc_size;
604 }
605
606 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
607 result.alloc_size, 1 << 20, 0, &va, &va_handle,
608 AMDGPU_VA_RANGE_HIGH);
609 if (r)
610 goto error_query;
611
612 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
613 va, 0, 0, AMDGPU_VA_OP_MAP);
614 if (r)
615 goto error_va_map;
616
617 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
618 initial |= RADEON_DOMAIN_VRAM;
619 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
620 initial |= RADEON_DOMAIN_GTT;
621
622 bo->bo = result.buf_handle;
623 bo->base.va = va;
624 bo->va_handle = va_handle;
625 bo->initial_domain = initial;
626 bo->size = result.alloc_size;
627 bo->is_shared = true;
628 bo->ws = ws;
629 bo->priority = priority;
630 bo->ref_count = 1;
631
632 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
633 assert(!r);
634
635 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
636 p_atomic_add(&ws->allocated_vram,
637 align64(bo->size, ws->info.gart_page_size));
638 if (bo->initial_domain & RADEON_DOMAIN_GTT)
639 p_atomic_add(&ws->allocated_gtt,
640 align64(bo->size, ws->info.gart_page_size));
641
642 radv_amdgpu_add_buffer_to_global_list(bo);
643 return (struct radeon_winsys_bo *)bo;
644 error_va_map:
645 amdgpu_va_range_free(va_handle);
646
647 error_query:
648 amdgpu_bo_free(result.buf_handle);
649
650 error:
651 FREE(bo);
652 return NULL;
653 }
654
655 static bool
656 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
657 struct radeon_winsys_bo *_bo,
658 int *fd)
659 {
660 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
661 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
662 int r;
663 unsigned handle;
664 r = amdgpu_bo_export(bo->bo, type, &handle);
665 if (r)
666 return false;
667
668 *fd = (int)handle;
669 bo->is_shared = true;
670 return true;
671 }
672
673 static bool
674 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
675 enum radeon_bo_domain *domains,
676 enum radeon_bo_flag *flags)
677 {
678 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
679 struct amdgpu_bo_import_result result = {0};
680 struct amdgpu_bo_info info = {0};
681 int r;
682
683 *domains = 0;
684 *flags = 0;
685
686 r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
687 if (r)
688 return false;
689
690 r = amdgpu_bo_query_info(result.buf_handle, &info);
691 amdgpu_bo_free(result.buf_handle);
692 if (r)
693 return false;
694
695 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
696 *domains |= RADEON_DOMAIN_VRAM;
697 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
698 *domains |= RADEON_DOMAIN_GTT;
699 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
700 *domains |= RADEON_DOMAIN_GDS;
701 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
702 *domains |= RADEON_DOMAIN_OA;
703
704 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
705 *flags |= RADEON_FLAG_CPU_ACCESS;
706 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
707 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
708 if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
709 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
710 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
711 *flags |= RADEON_FLAG_GTT_WC;
712 if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
713 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
714 if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
715 *flags |= RADEON_FLAG_ZERO_VRAM;
716 return true;
717 }
718
719 static unsigned eg_tile_split(unsigned tile_split)
720 {
721 switch (tile_split) {
722 case 0: tile_split = 64; break;
723 case 1: tile_split = 128; break;
724 case 2: tile_split = 256; break;
725 case 3: tile_split = 512; break;
726 default:
727 case 4: tile_split = 1024; break;
728 case 5: tile_split = 2048; break;
729 case 6: tile_split = 4096; break;
730 }
731 return tile_split;
732 }
733
734 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
735 {
736 switch (eg_tile_split) {
737 case 64: return 0;
738 case 128: return 1;
739 case 256: return 2;
740 case 512: return 3;
741 default:
742 case 1024: return 4;
743 case 2048: return 5;
744 case 4096: return 6;
745 }
746 }
747
748 static void
749 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
750 struct radeon_bo_metadata *md)
751 {
752 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
753 struct amdgpu_bo_metadata metadata = {0};
754 uint64_t tiling_flags = 0;
755
756 if (bo->ws->info.chip_class >= GFX9) {
757 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
758 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
759 } else {
760 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
761 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
762 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
763 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
764 else
765 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
766
767 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
768 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
769 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
770 if (md->u.legacy.tile_split)
771 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
772 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
773 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
774
775 if (md->u.legacy.scanout)
776 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
777 else
778 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
779 }
780
781 metadata.tiling_info = tiling_flags;
782 metadata.size_metadata = md->size_metadata;
783 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
784
785 amdgpu_bo_set_metadata(bo->bo, &metadata);
786 }
787
788 static void
789 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
790 struct radeon_bo_metadata *md)
791 {
792 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
793 struct amdgpu_bo_info info = {0};
794
795 int r = amdgpu_bo_query_info(bo->bo, &info);
796 if (r)
797 return;
798
799 uint64_t tiling_flags = info.metadata.tiling_info;
800
801 if (bo->ws->info.chip_class >= GFX9) {
802 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
803 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
804 } else {
805 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
806 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
807
808 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
809 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
810 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
811 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
812
813 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
814 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
815 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
816 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
817 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
818 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
819 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
820 }
821
822 md->size_metadata = info.metadata.size_metadata;
823 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
824 }
825
826 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
827 {
828 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
829 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
830 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
831 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
832 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
833 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
834 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
835 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
836 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
837 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
838 ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
839 }