3c629cfcda5498da93f6ec3c8fb49b77b131f7f3
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_bo.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32
33 #include <amdgpu.h>
34 #include <amdgpu_drm.h>
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38
39 #include "util/u_atomic.h"
40
41 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
42
43 static int
44 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
45 amdgpu_bo_handle bo,
46 uint64_t offset,
47 uint64_t size,
48 uint64_t addr,
49 uint32_t bo_flags,
50 uint32_t ops)
51 {
52 uint64_t flags = AMDGPU_VM_PAGE_READABLE |
53 AMDGPU_VM_PAGE_EXECUTABLE;
54
55 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
56 flags |= AMDGPU_VM_MTYPE_UC;
57
58 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
59 flags |= AMDGPU_VM_PAGE_WRITEABLE;
60
61 size = ALIGN(size, getpagesize());
62
63 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
64 flags, ops);
65 }
66
67 static void
68 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
69 const struct radv_amdgpu_map_range *range)
70 {
71 assert(range->size);
72
73 if (!range->bo)
74 return; /* TODO: PRT mapping */
75
76 p_atomic_inc(&range->bo->ref_count);
77 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
78 range->size, range->offset + bo->base.va,
79 0, AMDGPU_VA_OP_MAP);
80 if (r)
81 abort();
82 }
83
84 static void
85 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
86 const struct radv_amdgpu_map_range *range)
87 {
88 assert(range->size);
89
90 if (!range->bo)
91 return; /* TODO: PRT mapping */
92
93 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
94 range->size, range->offset + bo->base.va,
95 0, AMDGPU_VA_OP_UNMAP);
96 if (r)
97 abort();
98 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
99 }
100
101 static int bo_comparator(const void *ap, const void *bp) {
102 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
103 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
104 return (a > b) ? 1 : (a < b) ? -1 : 0;
105 }
106
107 static void
108 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
109 {
110 if (bo->bo_capacity < bo->range_count) {
111 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
112 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
113 bo->bo_capacity = new_count;
114 }
115
116 uint32_t temp_bo_count = 0;
117 for (uint32_t i = 0; i < bo->range_count; ++i)
118 if (bo->ranges[i].bo)
119 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
120
121 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
122
123 uint32_t final_bo_count = 1;
124 for (uint32_t i = 1; i < temp_bo_count; ++i)
125 if (bo->bos[i] != bo->bos[i - 1])
126 bo->bos[final_bo_count++] = bo->bos[i];
127
128 bo->bo_count = final_bo_count;
129 }
130
131 static void
132 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
133 uint64_t offset, uint64_t size,
134 struct radeon_winsys_bo *_bo, uint64_t bo_offset)
135 {
136 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
137 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
138 int range_count_delta, new_idx;
139 int first = 0, last;
140 struct radv_amdgpu_map_range new_first, new_last;
141
142 assert(parent->is_virtual);
143 assert(!bo || !bo->is_virtual);
144
145 if (!size)
146 return;
147
148 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
149 if (parent->range_capacity - parent->range_count < 2) {
150 parent->range_capacity += 2;
151 parent->ranges = realloc(parent->ranges,
152 parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
153 }
154
155 /*
156 * [first, last] is exactly the range of ranges that either overlap the
157 * new parent, or are adjacent to it. This corresponds to the bind ranges
158 * that may change.
159 */
160 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
161 ++first;
162
163 last = first;
164 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
165 ++last;
166
167 /* Whether the first or last range are going to be totally removed or just
168 * resized/left alone. Note that in the case of first == last, we will split
169 * this into a part before and after the new range. The remove flag is then
170 * whether to not create the corresponding split part. */
171 bool remove_first = parent->ranges[first].offset == offset;
172 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
173 bool unmapped_first = false;
174
175 assert(parent->ranges[first].offset <= offset);
176 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
177
178 /* Try to merge the new range with the first range. */
179 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
180 size += offset - parent->ranges[first].offset;
181 offset = parent->ranges[first].offset;
182 bo_offset = parent->ranges[first].bo_offset;
183 remove_first = true;
184 }
185
186 /* Try to merge the new range with the last range. */
187 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
188 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
189 remove_last = true;
190 }
191
192 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
193 new_idx = first + !remove_first;
194
195 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
196 for (int i = first + 1; i < last; ++i)
197 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
198
199 /* If the first/last range are not left alone we unmap then and optionally map
200 * them again after modifications. Not that this implicitly can do the splitting
201 * if first == last. */
202 new_first = parent->ranges[first];
203 new_last = parent->ranges[last];
204
205 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
206 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
207 unmapped_first = true;
208
209 if (!remove_first) {
210 new_first.size = offset - new_first.offset;
211 radv_amdgpu_winsys_virtual_map(parent, &new_first);
212 }
213 }
214
215 if (parent->ranges[last].offset < offset + size || remove_last) {
216 if (first != last || !unmapped_first)
217 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
218
219 if (!remove_last) {
220 new_last.size -= offset + size - new_last.offset;
221 new_last.offset = offset + size;
222 radv_amdgpu_winsys_virtual_map(parent, &new_last);
223 }
224 }
225
226 /* Moves the range list after last to account for the changed number of ranges. */
227 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
228 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
229
230 if (!remove_first)
231 parent->ranges[first] = new_first;
232
233 if (!remove_last)
234 parent->ranges[new_idx + 1] = new_last;
235
236 /* Actually set up the new range. */
237 parent->ranges[new_idx].offset = offset;
238 parent->ranges[new_idx].size = size;
239 parent->ranges[new_idx].bo = bo;
240 parent->ranges[new_idx].bo_offset = bo_offset;
241
242 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
243
244 parent->range_count += range_count_delta;
245
246 radv_amdgpu_winsys_rebuild_bo_list(parent);
247 }
248
249 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
250 {
251 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
252 struct radv_amdgpu_winsys *ws = bo->ws;
253
254 if (p_atomic_dec_return(&bo->ref_count))
255 return;
256 if (bo->is_virtual) {
257 for (uint32_t i = 0; i < bo->range_count; ++i) {
258 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
259 }
260 free(bo->bos);
261 free(bo->ranges);
262 } else {
263 if (bo->ws->debug_all_bos) {
264 pthread_mutex_lock(&bo->ws->global_bo_list_lock);
265 LIST_DEL(&bo->global_list_item);
266 bo->ws->num_buffers--;
267 pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
268 }
269 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
270 0, AMDGPU_VA_OP_UNMAP);
271 amdgpu_bo_free(bo->bo);
272 }
273
274 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
275 p_atomic_add(&ws->allocated_vram,
276 -align64(bo->size, ws->info.gart_page_size));
277 if (bo->base.vram_cpu_access)
278 p_atomic_add(&ws->allocated_vram_vis,
279 -align64(bo->size, ws->info.gart_page_size));
280 if (bo->initial_domain & RADEON_DOMAIN_GTT)
281 p_atomic_add(&ws->allocated_gtt,
282 -align64(bo->size, ws->info.gart_page_size));
283
284 amdgpu_va_range_free(bo->va_handle);
285 FREE(bo);
286 }
287
288 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
289 {
290 struct radv_amdgpu_winsys *ws = bo->ws;
291
292 if (bo->ws->debug_all_bos) {
293 pthread_mutex_lock(&ws->global_bo_list_lock);
294 list_addtail(&bo->global_list_item, &ws->global_bo_list);
295 ws->num_buffers++;
296 pthread_mutex_unlock(&ws->global_bo_list_lock);
297 }
298 }
299
300 static struct radeon_winsys_bo *
301 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
302 uint64_t size,
303 unsigned alignment,
304 enum radeon_bo_domain initial_domain,
305 unsigned flags,
306 unsigned priority)
307 {
308 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
309 struct radv_amdgpu_winsys_bo *bo;
310 struct amdgpu_bo_alloc_request request = {0};
311 amdgpu_bo_handle buf_handle;
312 uint64_t va = 0;
313 amdgpu_va_handle va_handle;
314 int r;
315 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
316 if (!bo) {
317 return NULL;
318 }
319
320 unsigned virt_alignment = alignment;
321 if (size >= ws->info.pte_fragment_size)
322 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
323
324 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
325 size, virt_alignment, 0, &va, &va_handle,
326 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
327 AMDGPU_VA_RANGE_HIGH);
328 if (r)
329 goto error_va_alloc;
330
331 bo->base.va = va;
332 bo->va_handle = va_handle;
333 bo->size = size;
334 bo->ws = ws;
335 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
336 bo->ref_count = 1;
337
338 if (flags & RADEON_FLAG_VIRTUAL) {
339 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
340 bo->range_count = 1;
341 bo->range_capacity = 1;
342
343 bo->ranges[0].offset = 0;
344 bo->ranges[0].size = size;
345 bo->ranges[0].bo = NULL;
346 bo->ranges[0].bo_offset = 0;
347
348 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
349 return (struct radeon_winsys_bo *)bo;
350 }
351
352 request.alloc_size = size;
353 request.phys_alignment = alignment;
354
355 if (initial_domain & RADEON_DOMAIN_VRAM)
356 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
357 if (initial_domain & RADEON_DOMAIN_GTT)
358 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
359 if (initial_domain & RADEON_DOMAIN_GDS)
360 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
361 if (initial_domain & RADEON_DOMAIN_OA)
362 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
363
364 if (flags & RADEON_FLAG_CPU_ACCESS) {
365 bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
366 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
367 }
368 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
369 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
370 if (flags & RADEON_FLAG_GTT_WC)
371 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
372 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
373 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
374 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
375 ws->info.has_local_buffers &&
376 (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
377 bo->base.is_local = true;
378 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
379 }
380
381 /* this won't do anything on pre 4.9 kernels */
382 if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM))
383 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
384 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
385 if (r) {
386 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
387 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
388 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
389 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
390 goto error_bo_alloc;
391 }
392
393 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags,
394 AMDGPU_VA_OP_MAP);
395 if (r)
396 goto error_va_map;
397
398 bo->bo = buf_handle;
399 bo->initial_domain = initial_domain;
400 bo->is_shared = false;
401 bo->priority = priority;
402
403 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
404 assert(!r);
405
406 if (initial_domain & RADEON_DOMAIN_VRAM)
407 p_atomic_add(&ws->allocated_vram,
408 align64(bo->size, ws->info.gart_page_size));
409 if (bo->base.vram_cpu_access)
410 p_atomic_add(&ws->allocated_vram_vis,
411 align64(bo->size, ws->info.gart_page_size));
412 if (initial_domain & RADEON_DOMAIN_GTT)
413 p_atomic_add(&ws->allocated_gtt,
414 align64(bo->size, ws->info.gart_page_size));
415
416 radv_amdgpu_add_buffer_to_global_list(bo);
417 return (struct radeon_winsys_bo *)bo;
418 error_va_map:
419 amdgpu_bo_free(buf_handle);
420
421 error_bo_alloc:
422 amdgpu_va_range_free(va_handle);
423
424 error_va_alloc:
425 FREE(bo);
426 return NULL;
427 }
428
429 static void *
430 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
431 {
432 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
433 int ret;
434 void *data;
435 ret = amdgpu_bo_cpu_map(bo->bo, &data);
436 if (ret)
437 return NULL;
438 return data;
439 }
440
441 static void
442 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
443 {
444 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
445 amdgpu_bo_cpu_unmap(bo->bo);
446 }
447
448 static uint64_t
449 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
450 uint64_t size, unsigned alignment)
451 {
452 uint64_t vm_alignment = alignment;
453
454 /* Increase the VM alignment for faster address translation. */
455 if (size >= ws->info.pte_fragment_size)
456 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
457
458 /* Gfx9: Increase the VM alignment to the most significant bit set
459 * in the size for faster address translation.
460 */
461 if (ws->info.chip_class >= GFX9) {
462 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
463 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
464
465 vm_alignment = MAX2(vm_alignment, msb_alignment);
466 }
467 return vm_alignment;
468 }
469
470 static struct radeon_winsys_bo *
471 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
472 void *pointer,
473 uint64_t size,
474 unsigned priority)
475 {
476 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
477 amdgpu_bo_handle buf_handle;
478 struct radv_amdgpu_winsys_bo *bo;
479 uint64_t va;
480 amdgpu_va_handle va_handle;
481 uint64_t vm_alignment;
482
483 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
484 if (!bo)
485 return NULL;
486
487 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
488 goto error;
489
490 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
491 * are imported.
492 */
493 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
494 ws->info.gart_page_size);
495
496 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
497 size, vm_alignment, 0, &va, &va_handle,
498 AMDGPU_VA_RANGE_HIGH))
499 goto error_va_alloc;
500
501 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
502 goto error_va_map;
503
504 /* Initialize it */
505 bo->base.va = va;
506 bo->va_handle = va_handle;
507 bo->size = size;
508 bo->ref_count = 1;
509 bo->ws = ws;
510 bo->bo = buf_handle;
511 bo->initial_domain = RADEON_DOMAIN_GTT;
512 bo->priority = priority;
513
514 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
515 assert(!r);
516
517 p_atomic_add(&ws->allocated_gtt,
518 align64(bo->size, ws->info.gart_page_size));
519
520 radv_amdgpu_add_buffer_to_global_list(bo);
521 return (struct radeon_winsys_bo *)bo;
522
523 error_va_map:
524 amdgpu_va_range_free(va_handle);
525
526 error_va_alloc:
527 amdgpu_bo_free(buf_handle);
528
529 error:
530 FREE(bo);
531 return NULL;
532 }
533
534 static struct radeon_winsys_bo *
535 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
536 int fd, unsigned priority,
537 uint64_t *alloc_size)
538 {
539 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
540 struct radv_amdgpu_winsys_bo *bo;
541 uint64_t va;
542 amdgpu_va_handle va_handle;
543 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
544 struct amdgpu_bo_import_result result = {0};
545 struct amdgpu_bo_info info = {0};
546 enum radeon_bo_domain initial = 0;
547 int r;
548 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
549 if (!bo)
550 return NULL;
551
552 r = amdgpu_bo_import(ws->dev, type, fd, &result);
553 if (r)
554 goto error;
555
556 r = amdgpu_bo_query_info(result.buf_handle, &info);
557 if (r)
558 goto error_query;
559
560 if (alloc_size) {
561 *alloc_size = info.alloc_size;
562 }
563
564 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
565 result.alloc_size, 1 << 20, 0, &va, &va_handle,
566 AMDGPU_VA_RANGE_HIGH);
567 if (r)
568 goto error_query;
569
570 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
571 va, 0, AMDGPU_VA_OP_MAP);
572 if (r)
573 goto error_va_map;
574
575 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
576 initial |= RADEON_DOMAIN_VRAM;
577 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
578 initial |= RADEON_DOMAIN_GTT;
579
580 bo->bo = result.buf_handle;
581 bo->base.va = va;
582 bo->va_handle = va_handle;
583 bo->initial_domain = initial;
584 bo->size = result.alloc_size;
585 bo->is_shared = true;
586 bo->ws = ws;
587 bo->priority = priority;
588 bo->ref_count = 1;
589
590 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
591 assert(!r);
592
593 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
594 p_atomic_add(&ws->allocated_vram,
595 align64(bo->size, ws->info.gart_page_size));
596 if (bo->initial_domain & RADEON_DOMAIN_GTT)
597 p_atomic_add(&ws->allocated_gtt,
598 align64(bo->size, ws->info.gart_page_size));
599
600 radv_amdgpu_add_buffer_to_global_list(bo);
601 return (struct radeon_winsys_bo *)bo;
602 error_va_map:
603 amdgpu_va_range_free(va_handle);
604
605 error_query:
606 amdgpu_bo_free(result.buf_handle);
607
608 error:
609 FREE(bo);
610 return NULL;
611 }
612
613 static bool
614 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
615 struct radeon_winsys_bo *_bo,
616 int *fd)
617 {
618 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
619 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
620 int r;
621 unsigned handle;
622 r = amdgpu_bo_export(bo->bo, type, &handle);
623 if (r)
624 return false;
625
626 *fd = (int)handle;
627 bo->is_shared = true;
628 return true;
629 }
630
631 static unsigned eg_tile_split(unsigned tile_split)
632 {
633 switch (tile_split) {
634 case 0: tile_split = 64; break;
635 case 1: tile_split = 128; break;
636 case 2: tile_split = 256; break;
637 case 3: tile_split = 512; break;
638 default:
639 case 4: tile_split = 1024; break;
640 case 5: tile_split = 2048; break;
641 case 6: tile_split = 4096; break;
642 }
643 return tile_split;
644 }
645
646 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
647 {
648 switch (eg_tile_split) {
649 case 64: return 0;
650 case 128: return 1;
651 case 256: return 2;
652 case 512: return 3;
653 default:
654 case 1024: return 4;
655 case 2048: return 5;
656 case 4096: return 6;
657 }
658 }
659
660 static void
661 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
662 struct radeon_bo_metadata *md)
663 {
664 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
665 struct amdgpu_bo_metadata metadata = {0};
666 uint32_t tiling_flags = 0;
667
668 if (bo->ws->info.chip_class >= GFX9) {
669 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
670 } else {
671 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
672 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
673 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
674 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
675 else
676 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
677
678 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
679 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
680 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
681 if (md->u.legacy.tile_split)
682 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
683 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
684 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
685
686 if (md->u.legacy.scanout)
687 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
688 else
689 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
690 }
691
692 metadata.tiling_info = tiling_flags;
693 metadata.size_metadata = md->size_metadata;
694 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
695
696 amdgpu_bo_set_metadata(bo->bo, &metadata);
697 }
698
699 static void
700 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
701 struct radeon_bo_metadata *md)
702 {
703 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
704 struct amdgpu_bo_info info = {0};
705
706 int r = amdgpu_bo_query_info(bo->bo, &info);
707 if (r)
708 return;
709
710 uint64_t tiling_flags = info.metadata.tiling_info;
711
712 if (bo->ws->info.chip_class >= GFX9) {
713 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
714 } else {
715 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
716 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
717
718 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
719 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
720 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
721 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
722
723 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
724 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
725 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
726 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
727 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
728 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
729 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
730 }
731
732 md->size_metadata = info.metadata.size_metadata;
733 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
734 }
735
736 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
737 {
738 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
739 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
740 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
741 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
742 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
743 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
744 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
745 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
746 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
747 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
748 }