amd: remove duplicated definitions from amdgpu_drm.h
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_bo.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32
33 #include <amdgpu.h>
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42
43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
44
45 static int
46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
47 amdgpu_bo_handle bo,
48 uint64_t offset,
49 uint64_t size,
50 uint64_t addr,
51 uint32_t bo_flags,
52 uint32_t ops)
53 {
54 uint64_t flags = AMDGPU_VM_PAGE_READABLE |
55 AMDGPU_VM_PAGE_EXECUTABLE;
56
57 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
58 flags |= AMDGPU_VM_MTYPE_UC;
59
60 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
61 flags |= AMDGPU_VM_PAGE_WRITEABLE;
62
63 size = align64(size, getpagesize());
64
65 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
66 flags, ops);
67 }
68
69 static void
70 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
71 const struct radv_amdgpu_map_range *range)
72 {
73 assert(range->size);
74
75 if (!range->bo)
76 return; /* TODO: PRT mapping */
77
78 p_atomic_inc(&range->bo->ref_count);
79 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
80 range->size, range->offset + bo->base.va,
81 0, AMDGPU_VA_OP_MAP);
82 if (r)
83 abort();
84 }
85
86 static void
87 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
88 const struct radv_amdgpu_map_range *range)
89 {
90 assert(range->size);
91
92 if (!range->bo)
93 return; /* TODO: PRT mapping */
94
95 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset,
96 range->size, range->offset + bo->base.va,
97 0, AMDGPU_VA_OP_UNMAP);
98 if (r)
99 abort();
100 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
101 }
102
103 static int bo_comparator(const void *ap, const void *bp) {
104 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
105 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
106 return (a > b) ? 1 : (a < b) ? -1 : 0;
107 }
108
109 static void
110 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
111 {
112 if (bo->bo_capacity < bo->range_count) {
113 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
114 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
115 bo->bo_capacity = new_count;
116 }
117
118 uint32_t temp_bo_count = 0;
119 for (uint32_t i = 0; i < bo->range_count; ++i)
120 if (bo->ranges[i].bo)
121 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
122
123 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
124
125 uint32_t final_bo_count = 1;
126 for (uint32_t i = 1; i < temp_bo_count; ++i)
127 if (bo->bos[i] != bo->bos[i - 1])
128 bo->bos[final_bo_count++] = bo->bos[i];
129
130 bo->bo_count = final_bo_count;
131 }
132
133 static void
134 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
135 uint64_t offset, uint64_t size,
136 struct radeon_winsys_bo *_bo, uint64_t bo_offset)
137 {
138 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
139 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
140 int range_count_delta, new_idx;
141 int first = 0, last;
142 struct radv_amdgpu_map_range new_first, new_last;
143
144 assert(parent->is_virtual);
145 assert(!bo || !bo->is_virtual);
146
147 if (!size)
148 return;
149
150 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
151 if (parent->range_capacity - parent->range_count < 2) {
152 parent->range_capacity += 2;
153 parent->ranges = realloc(parent->ranges,
154 parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
155 }
156
157 /*
158 * [first, last] is exactly the range of ranges that either overlap the
159 * new parent, or are adjacent to it. This corresponds to the bind ranges
160 * that may change.
161 */
162 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
163 ++first;
164
165 last = first;
166 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
167 ++last;
168
169 /* Whether the first or last range are going to be totally removed or just
170 * resized/left alone. Note that in the case of first == last, we will split
171 * this into a part before and after the new range. The remove flag is then
172 * whether to not create the corresponding split part. */
173 bool remove_first = parent->ranges[first].offset == offset;
174 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
175 bool unmapped_first = false;
176
177 assert(parent->ranges[first].offset <= offset);
178 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
179
180 /* Try to merge the new range with the first range. */
181 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
182 size += offset - parent->ranges[first].offset;
183 offset = parent->ranges[first].offset;
184 bo_offset = parent->ranges[first].bo_offset;
185 remove_first = true;
186 }
187
188 /* Try to merge the new range with the last range. */
189 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
190 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
191 remove_last = true;
192 }
193
194 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
195 new_idx = first + !remove_first;
196
197 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
198 for (int i = first + 1; i < last; ++i)
199 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
200
201 /* If the first/last range are not left alone we unmap then and optionally map
202 * them again after modifications. Not that this implicitly can do the splitting
203 * if first == last. */
204 new_first = parent->ranges[first];
205 new_last = parent->ranges[last];
206
207 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
208 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
209 unmapped_first = true;
210
211 if (!remove_first) {
212 new_first.size = offset - new_first.offset;
213 radv_amdgpu_winsys_virtual_map(parent, &new_first);
214 }
215 }
216
217 if (parent->ranges[last].offset < offset + size || remove_last) {
218 if (first != last || !unmapped_first)
219 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
220
221 if (!remove_last) {
222 new_last.size -= offset + size - new_last.offset;
223 new_last.offset = offset + size;
224 radv_amdgpu_winsys_virtual_map(parent, &new_last);
225 }
226 }
227
228 /* Moves the range list after last to account for the changed number of ranges. */
229 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
230 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
231
232 if (!remove_first)
233 parent->ranges[first] = new_first;
234
235 if (!remove_last)
236 parent->ranges[new_idx + 1] = new_last;
237
238 /* Actually set up the new range. */
239 parent->ranges[new_idx].offset = offset;
240 parent->ranges[new_idx].size = size;
241 parent->ranges[new_idx].bo = bo;
242 parent->ranges[new_idx].bo_offset = bo_offset;
243
244 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
245
246 parent->range_count += range_count_delta;
247
248 radv_amdgpu_winsys_rebuild_bo_list(parent);
249 }
250
251 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
252 {
253 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
254 struct radv_amdgpu_winsys *ws = bo->ws;
255
256 if (p_atomic_dec_return(&bo->ref_count))
257 return;
258 if (bo->is_virtual) {
259 for (uint32_t i = 0; i < bo->range_count; ++i) {
260 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
261 }
262 free(bo->bos);
263 free(bo->ranges);
264 } else {
265 if (bo->ws->debug_all_bos) {
266 pthread_mutex_lock(&bo->ws->global_bo_list_lock);
267 list_del(&bo->global_list_item);
268 bo->ws->num_buffers--;
269 pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
270 }
271 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
272 0, AMDGPU_VA_OP_UNMAP);
273 amdgpu_bo_free(bo->bo);
274 }
275
276 if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
277 if (bo->base.vram_no_cpu_access) {
278 p_atomic_add(&ws->allocated_vram,
279 -align64(bo->size, ws->info.gart_page_size));
280 } else {
281 p_atomic_add(&ws->allocated_vram_vis,
282 -align64(bo->size, ws->info.gart_page_size));
283 }
284 }
285
286 if (bo->initial_domain & RADEON_DOMAIN_GTT)
287 p_atomic_add(&ws->allocated_gtt,
288 -align64(bo->size, ws->info.gart_page_size));
289
290 amdgpu_va_range_free(bo->va_handle);
291 FREE(bo);
292 }
293
294 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
295 {
296 struct radv_amdgpu_winsys *ws = bo->ws;
297
298 if (bo->ws->debug_all_bos) {
299 pthread_mutex_lock(&ws->global_bo_list_lock);
300 list_addtail(&bo->global_list_item, &ws->global_bo_list);
301 ws->num_buffers++;
302 pthread_mutex_unlock(&ws->global_bo_list_lock);
303 }
304 }
305
306 static struct radeon_winsys_bo *
307 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
308 uint64_t size,
309 unsigned alignment,
310 enum radeon_bo_domain initial_domain,
311 unsigned flags,
312 unsigned priority)
313 {
314 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
315 struct radv_amdgpu_winsys_bo *bo;
316 struct amdgpu_bo_alloc_request request = {0};
317 amdgpu_bo_handle buf_handle;
318 uint64_t va = 0;
319 amdgpu_va_handle va_handle;
320 int r;
321 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
322 if (!bo) {
323 return NULL;
324 }
325
326 unsigned virt_alignment = alignment;
327 if (size >= ws->info.pte_fragment_size)
328 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
329
330 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
331 size, virt_alignment, 0, &va, &va_handle,
332 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
333 AMDGPU_VA_RANGE_HIGH);
334 if (r)
335 goto error_va_alloc;
336
337 bo->base.va = va;
338 bo->va_handle = va_handle;
339 bo->size = size;
340 bo->ws = ws;
341 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
342 bo->ref_count = 1;
343
344 if (flags & RADEON_FLAG_VIRTUAL) {
345 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
346 bo->range_count = 1;
347 bo->range_capacity = 1;
348
349 bo->ranges[0].offset = 0;
350 bo->ranges[0].size = size;
351 bo->ranges[0].bo = NULL;
352 bo->ranges[0].bo_offset = 0;
353
354 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
355 return (struct radeon_winsys_bo *)bo;
356 }
357
358 request.alloc_size = size;
359 request.phys_alignment = alignment;
360
361 if (initial_domain & RADEON_DOMAIN_VRAM)
362 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
363 if (initial_domain & RADEON_DOMAIN_GTT)
364 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
365 if (initial_domain & RADEON_DOMAIN_GDS)
366 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
367 if (initial_domain & RADEON_DOMAIN_OA)
368 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
369
370 if (flags & RADEON_FLAG_CPU_ACCESS)
371 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
372 if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
373 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
374 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
375 }
376 if (flags & RADEON_FLAG_GTT_WC)
377 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
378 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
379 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
380 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
381 ws->info.has_local_buffers &&
382 (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
383 bo->base.is_local = true;
384 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
385 }
386
387 /* this won't do anything on pre 4.9 kernels */
388 if (initial_domain & RADEON_DOMAIN_VRAM) {
389 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
390 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
391 }
392
393 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
394 if (r) {
395 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
396 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
397 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
398 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
399 goto error_bo_alloc;
400 }
401
402 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags,
403 AMDGPU_VA_OP_MAP);
404 if (r)
405 goto error_va_map;
406
407 bo->bo = buf_handle;
408 bo->initial_domain = initial_domain;
409 bo->is_shared = false;
410 bo->priority = priority;
411
412 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
413 assert(!r);
414
415 if (initial_domain & RADEON_DOMAIN_VRAM) {
416 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
417 * aren't mappable and they are counted as part of the VRAM
418 * counter.
419 *
420 * Otherwise, buffers with the CPU_ACCESS flag or without any
421 * of both (imported buffers) are counted as part of the VRAM
422 * visible counter because they can be mapped.
423 */
424 if (bo->base.vram_no_cpu_access) {
425 p_atomic_add(&ws->allocated_vram,
426 align64(bo->size, ws->info.gart_page_size));
427 } else {
428 p_atomic_add(&ws->allocated_vram_vis,
429 align64(bo->size, ws->info.gart_page_size));
430 }
431 }
432
433 if (initial_domain & RADEON_DOMAIN_GTT)
434 p_atomic_add(&ws->allocated_gtt,
435 align64(bo->size, ws->info.gart_page_size));
436
437 radv_amdgpu_add_buffer_to_global_list(bo);
438 return (struct radeon_winsys_bo *)bo;
439 error_va_map:
440 amdgpu_bo_free(buf_handle);
441
442 error_bo_alloc:
443 amdgpu_va_range_free(va_handle);
444
445 error_va_alloc:
446 FREE(bo);
447 return NULL;
448 }
449
450 static void *
451 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
452 {
453 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
454 int ret;
455 void *data;
456 ret = amdgpu_bo_cpu_map(bo->bo, &data);
457 if (ret)
458 return NULL;
459 return data;
460 }
461
462 static void
463 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
464 {
465 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
466 amdgpu_bo_cpu_unmap(bo->bo);
467 }
468
469 static uint64_t
470 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
471 uint64_t size, unsigned alignment)
472 {
473 uint64_t vm_alignment = alignment;
474
475 /* Increase the VM alignment for faster address translation. */
476 if (size >= ws->info.pte_fragment_size)
477 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
478
479 /* Gfx9: Increase the VM alignment to the most significant bit set
480 * in the size for faster address translation.
481 */
482 if (ws->info.chip_class >= GFX9) {
483 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
484 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
485
486 vm_alignment = MAX2(vm_alignment, msb_alignment);
487 }
488 return vm_alignment;
489 }
490
491 static struct radeon_winsys_bo *
492 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
493 void *pointer,
494 uint64_t size,
495 unsigned priority)
496 {
497 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
498 amdgpu_bo_handle buf_handle;
499 struct radv_amdgpu_winsys_bo *bo;
500 uint64_t va;
501 amdgpu_va_handle va_handle;
502 uint64_t vm_alignment;
503
504 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
505 if (!bo)
506 return NULL;
507
508 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
509 goto error;
510
511 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
512 * are imported.
513 */
514 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
515 ws->info.gart_page_size);
516
517 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
518 size, vm_alignment, 0, &va, &va_handle,
519 AMDGPU_VA_RANGE_HIGH))
520 goto error_va_alloc;
521
522 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
523 goto error_va_map;
524
525 /* Initialize it */
526 bo->base.va = va;
527 bo->va_handle = va_handle;
528 bo->size = size;
529 bo->ref_count = 1;
530 bo->ws = ws;
531 bo->bo = buf_handle;
532 bo->initial_domain = RADEON_DOMAIN_GTT;
533 bo->priority = priority;
534
535 ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
536 assert(!r);
537
538 p_atomic_add(&ws->allocated_gtt,
539 align64(bo->size, ws->info.gart_page_size));
540
541 radv_amdgpu_add_buffer_to_global_list(bo);
542 return (struct radeon_winsys_bo *)bo;
543
544 error_va_map:
545 amdgpu_va_range_free(va_handle);
546
547 error_va_alloc:
548 amdgpu_bo_free(buf_handle);
549
550 error:
551 FREE(bo);
552 return NULL;
553 }
554
555 static struct radeon_winsys_bo *
556 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
557 int fd, unsigned priority,
558 uint64_t *alloc_size)
559 {
560 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
561 struct radv_amdgpu_winsys_bo *bo;
562 uint64_t va;
563 amdgpu_va_handle va_handle;
564 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
565 struct amdgpu_bo_import_result result = {0};
566 struct amdgpu_bo_info info = {0};
567 enum radeon_bo_domain initial = 0;
568 int r;
569 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
570 if (!bo)
571 return NULL;
572
573 r = amdgpu_bo_import(ws->dev, type, fd, &result);
574 if (r)
575 goto error;
576
577 r = amdgpu_bo_query_info(result.buf_handle, &info);
578 if (r)
579 goto error_query;
580
581 if (alloc_size) {
582 *alloc_size = info.alloc_size;
583 }
584
585 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
586 result.alloc_size, 1 << 20, 0, &va, &va_handle,
587 AMDGPU_VA_RANGE_HIGH);
588 if (r)
589 goto error_query;
590
591 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
592 va, 0, AMDGPU_VA_OP_MAP);
593 if (r)
594 goto error_va_map;
595
596 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
597 initial |= RADEON_DOMAIN_VRAM;
598 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
599 initial |= RADEON_DOMAIN_GTT;
600
601 bo->bo = result.buf_handle;
602 bo->base.va = va;
603 bo->va_handle = va_handle;
604 bo->initial_domain = initial;
605 bo->size = result.alloc_size;
606 bo->is_shared = true;
607 bo->ws = ws;
608 bo->priority = priority;
609 bo->ref_count = 1;
610
611 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
612 assert(!r);
613
614 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
615 p_atomic_add(&ws->allocated_vram,
616 align64(bo->size, ws->info.gart_page_size));
617 if (bo->initial_domain & RADEON_DOMAIN_GTT)
618 p_atomic_add(&ws->allocated_gtt,
619 align64(bo->size, ws->info.gart_page_size));
620
621 radv_amdgpu_add_buffer_to_global_list(bo);
622 return (struct radeon_winsys_bo *)bo;
623 error_va_map:
624 amdgpu_va_range_free(va_handle);
625
626 error_query:
627 amdgpu_bo_free(result.buf_handle);
628
629 error:
630 FREE(bo);
631 return NULL;
632 }
633
634 static bool
635 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
636 struct radeon_winsys_bo *_bo,
637 int *fd)
638 {
639 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
640 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
641 int r;
642 unsigned handle;
643 r = amdgpu_bo_export(bo->bo, type, &handle);
644 if (r)
645 return false;
646
647 *fd = (int)handle;
648 bo->is_shared = true;
649 return true;
650 }
651
652 static bool
653 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
654 enum radeon_bo_domain *domains,
655 enum radeon_bo_flag *flags)
656 {
657 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
658 struct amdgpu_bo_import_result result = {0};
659 struct amdgpu_bo_info info = {0};
660 int r;
661
662 *domains = 0;
663 *flags = 0;
664
665 r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
666 if (r)
667 return false;
668
669 r = amdgpu_bo_query_info(result.buf_handle, &info);
670 amdgpu_bo_free(result.buf_handle);
671 if (r)
672 return false;
673
674 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
675 *domains |= RADEON_DOMAIN_VRAM;
676 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
677 *domains |= RADEON_DOMAIN_GTT;
678 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
679 *domains |= RADEON_DOMAIN_GDS;
680 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
681 *domains |= RADEON_DOMAIN_OA;
682
683 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
684 *flags |= RADEON_FLAG_CPU_ACCESS;
685 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
686 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
687 if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
688 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
689 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
690 *flags |= RADEON_FLAG_GTT_WC;
691 if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
692 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
693 if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
694 *flags |= RADEON_FLAG_ZERO_VRAM;
695 return true;
696 }
697
698 static unsigned eg_tile_split(unsigned tile_split)
699 {
700 switch (tile_split) {
701 case 0: tile_split = 64; break;
702 case 1: tile_split = 128; break;
703 case 2: tile_split = 256; break;
704 case 3: tile_split = 512; break;
705 default:
706 case 4: tile_split = 1024; break;
707 case 5: tile_split = 2048; break;
708 case 6: tile_split = 4096; break;
709 }
710 return tile_split;
711 }
712
713 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
714 {
715 switch (eg_tile_split) {
716 case 64: return 0;
717 case 128: return 1;
718 case 256: return 2;
719 case 512: return 3;
720 default:
721 case 1024: return 4;
722 case 2048: return 5;
723 case 4096: return 6;
724 }
725 }
726
727 static void
728 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
729 struct radeon_bo_metadata *md)
730 {
731 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
732 struct amdgpu_bo_metadata metadata = {0};
733 uint64_t tiling_flags = 0;
734
735 if (bo->ws->info.chip_class >= GFX9) {
736 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
737 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
738 } else {
739 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
740 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
741 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
742 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
743 else
744 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
745
746 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
747 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
748 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
749 if (md->u.legacy.tile_split)
750 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
751 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
752 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
753
754 if (md->u.legacy.scanout)
755 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
756 else
757 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
758 }
759
760 metadata.tiling_info = tiling_flags;
761 metadata.size_metadata = md->size_metadata;
762 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
763
764 amdgpu_bo_set_metadata(bo->bo, &metadata);
765 }
766
767 static void
768 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
769 struct radeon_bo_metadata *md)
770 {
771 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
772 struct amdgpu_bo_info info = {0};
773
774 int r = amdgpu_bo_query_info(bo->bo, &info);
775 if (r)
776 return;
777
778 uint64_t tiling_flags = info.metadata.tiling_info;
779
780 if (bo->ws->info.chip_class >= GFX9) {
781 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
782 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
783 } else {
784 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
785 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
786
787 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
788 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
789 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
790 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
791
792 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
793 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
794 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
795 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
796 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
797 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
798 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
799 }
800
801 md->size_metadata = info.metadata.size_metadata;
802 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
803 }
804
805 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
806 {
807 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
808 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
809 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
810 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
811 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
812 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
813 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
814 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
815 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
816 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
817 ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
818 }