2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include "radv_amdgpu_bo.h"
34 #include "drm-uapi/amdgpu_drm.h"
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
);
46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys
*ws
,
52 uint64_t internal_flags
,
55 uint64_t flags
= internal_flags
;
57 flags
= AMDGPU_VM_PAGE_READABLE
|
58 AMDGPU_VM_PAGE_EXECUTABLE
;
60 if ((bo_flags
& RADEON_FLAG_VA_UNCACHED
) &&
61 ws
->info
.chip_class
>= GFX9
)
62 flags
|= AMDGPU_VM_MTYPE_UC
;
64 if (!(bo_flags
& RADEON_FLAG_READ_ONLY
))
65 flags
|= AMDGPU_VM_PAGE_WRITEABLE
;
68 size
= align64(size
, getpagesize());
70 return amdgpu_bo_va_op_raw(ws
->dev
, bo
, offset
, size
, addr
,
75 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo
*bo
,
76 const struct radv_amdgpu_map_range
*range
)
78 uint64_t internal_flags
= 0;
82 if (!bo
->ws
->info
.has_sparse_vm_mappings
)
85 internal_flags
|= AMDGPU_VM_PAGE_PRT
;
87 p_atomic_inc(&range
->bo
->ref_count
);
89 int r
= radv_amdgpu_bo_va_op(bo
->ws
, range
->bo
? range
->bo
->bo
: NULL
,
90 range
->bo_offset
, range
->size
,
91 range
->offset
+ bo
->base
.va
, 0,
92 internal_flags
, AMDGPU_VA_OP_MAP
);
98 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo
*bo
,
99 const struct radv_amdgpu_map_range
*range
)
101 uint64_t internal_flags
= 0;
105 if(!bo
->ws
->info
.has_sparse_vm_mappings
)
108 /* Even though this is an unmap, if we don't set this flag,
109 AMDGPU is going to complain about the missing buffer. */
110 internal_flags
|= AMDGPU_VM_PAGE_PRT
;
113 int r
= radv_amdgpu_bo_va_op(bo
->ws
, range
->bo
? range
->bo
->bo
: NULL
,
114 range
->bo_offset
, range
->size
,
115 range
->offset
+ bo
->base
.va
, 0, internal_flags
,
121 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo
*)range
->bo
);
124 static int bo_comparator(const void *ap
, const void *bp
) {
125 struct radv_amdgpu_bo
*a
= *(struct radv_amdgpu_bo
*const *)ap
;
126 struct radv_amdgpu_bo
*b
= *(struct radv_amdgpu_bo
*const *)bp
;
127 return (a
> b
) ? 1 : (a
< b
) ? -1 : 0;
131 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo
*bo
)
133 if (bo
->bo_capacity
< bo
->range_count
) {
134 uint32_t new_count
= MAX2(bo
->bo_capacity
* 2, bo
->range_count
);
135 bo
->bos
= realloc(bo
->bos
, new_count
* sizeof(struct radv_amdgpu_winsys_bo
*));
136 bo
->bo_capacity
= new_count
;
139 uint32_t temp_bo_count
= 0;
140 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
)
141 if (bo
->ranges
[i
].bo
)
142 bo
->bos
[temp_bo_count
++] = bo
->ranges
[i
].bo
;
144 qsort(bo
->bos
, temp_bo_count
, sizeof(struct radv_amdgpu_winsys_bo
*), &bo_comparator
);
146 uint32_t final_bo_count
= 1;
147 for (uint32_t i
= 1; i
< temp_bo_count
; ++i
)
148 if (bo
->bos
[i
] != bo
->bos
[i
- 1])
149 bo
->bos
[final_bo_count
++] = bo
->bos
[i
];
151 bo
->bo_count
= final_bo_count
;
155 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo
*_parent
,
156 uint64_t offset
, uint64_t size
,
157 struct radeon_winsys_bo
*_bo
, uint64_t bo_offset
)
159 struct radv_amdgpu_winsys_bo
*parent
= (struct radv_amdgpu_winsys_bo
*)_parent
;
160 struct radv_amdgpu_winsys_bo
*bo
= (struct radv_amdgpu_winsys_bo
*)_bo
;
161 int range_count_delta
, new_idx
;
163 struct radv_amdgpu_map_range new_first
, new_last
;
165 assert(parent
->is_virtual
);
166 assert(!bo
|| !bo
->is_virtual
);
171 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
172 if (parent
->range_capacity
- parent
->range_count
< 2) {
173 parent
->range_capacity
+= 2;
174 parent
->ranges
= realloc(parent
->ranges
,
175 parent
->range_capacity
* sizeof(struct radv_amdgpu_map_range
));
179 * [first, last] is exactly the range of ranges that either overlap the
180 * new parent, or are adjacent to it. This corresponds to the bind ranges
183 while(first
+ 1 < parent
->range_count
&& parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
< offset
)
187 while(last
+ 1 < parent
->range_count
&& parent
->ranges
[last
].offset
<= offset
+ size
)
190 /* Whether the first or last range are going to be totally removed or just
191 * resized/left alone. Note that in the case of first == last, we will split
192 * this into a part before and after the new range. The remove flag is then
193 * whether to not create the corresponding split part. */
194 bool remove_first
= parent
->ranges
[first
].offset
== offset
;
195 bool remove_last
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
== offset
+ size
;
196 bool unmapped_first
= false;
198 assert(parent
->ranges
[first
].offset
<= offset
);
199 assert(parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
>= offset
+ size
);
201 /* Try to merge the new range with the first range. */
202 if (parent
->ranges
[first
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[first
].offset
- parent
->ranges
[first
].bo_offset
)) {
203 size
+= offset
- parent
->ranges
[first
].offset
;
204 offset
= parent
->ranges
[first
].offset
;
205 bo_offset
= parent
->ranges
[first
].bo_offset
;
209 /* Try to merge the new range with the last range. */
210 if (parent
->ranges
[last
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[last
].offset
- parent
->ranges
[last
].bo_offset
)) {
211 size
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
- offset
;
215 range_count_delta
= 1 - (last
- first
+ 1) + !remove_first
+ !remove_last
;
216 new_idx
= first
+ !remove_first
;
218 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
219 for (int i
= first
+ 1; i
< last
; ++i
)
220 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ i
);
222 /* If the first/last range are not left alone we unmap then and optionally map
223 * them again after modifications. Not that this implicitly can do the splitting
224 * if first == last. */
225 new_first
= parent
->ranges
[first
];
226 new_last
= parent
->ranges
[last
];
228 if (parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
> offset
|| remove_first
) {
229 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ first
);
230 unmapped_first
= true;
233 new_first
.size
= offset
- new_first
.offset
;
234 radv_amdgpu_winsys_virtual_map(parent
, &new_first
);
238 if (parent
->ranges
[last
].offset
< offset
+ size
|| remove_last
) {
239 if (first
!= last
|| !unmapped_first
)
240 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ last
);
243 new_last
.size
-= offset
+ size
- new_last
.offset
;
244 new_last
.offset
= offset
+ size
;
245 radv_amdgpu_winsys_virtual_map(parent
, &new_last
);
249 /* Moves the range list after last to account for the changed number of ranges. */
250 memmove(parent
->ranges
+ last
+ 1 + range_count_delta
, parent
->ranges
+ last
+ 1,
251 sizeof(struct radv_amdgpu_map_range
) * (parent
->range_count
- last
- 1));
254 parent
->ranges
[first
] = new_first
;
257 parent
->ranges
[new_idx
+ 1] = new_last
;
259 /* Actually set up the new range. */
260 parent
->ranges
[new_idx
].offset
= offset
;
261 parent
->ranges
[new_idx
].size
= size
;
262 parent
->ranges
[new_idx
].bo
= bo
;
263 parent
->ranges
[new_idx
].bo_offset
= bo_offset
;
265 radv_amdgpu_winsys_virtual_map(parent
, parent
->ranges
+ new_idx
);
267 parent
->range_count
+= range_count_delta
;
269 radv_amdgpu_winsys_rebuild_bo_list(parent
);
272 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
)
274 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
275 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
277 if (p_atomic_dec_return(&bo
->ref_count
))
279 if (bo
->is_virtual
) {
280 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
) {
281 radv_amdgpu_winsys_virtual_unmap(bo
, bo
->ranges
+ i
);
286 if (bo
->ws
->debug_all_bos
) {
287 pthread_mutex_lock(&bo
->ws
->global_bo_list_lock
);
288 list_del(&bo
->global_list_item
);
289 bo
->ws
->num_buffers
--;
290 pthread_mutex_unlock(&bo
->ws
->global_bo_list_lock
);
292 radv_amdgpu_bo_va_op(bo
->ws
, bo
->bo
, 0, bo
->size
, bo
->base
.va
,
293 0, 0, AMDGPU_VA_OP_UNMAP
);
294 amdgpu_bo_free(bo
->bo
);
297 if (bo
->initial_domain
& RADEON_DOMAIN_VRAM
) {
298 if (bo
->base
.vram_no_cpu_access
) {
299 p_atomic_add(&ws
->allocated_vram
,
300 -align64(bo
->size
, ws
->info
.gart_page_size
));
302 p_atomic_add(&ws
->allocated_vram_vis
,
303 -align64(bo
->size
, ws
->info
.gart_page_size
));
307 if (bo
->initial_domain
& RADEON_DOMAIN_GTT
)
308 p_atomic_add(&ws
->allocated_gtt
,
309 -align64(bo
->size
, ws
->info
.gart_page_size
));
311 amdgpu_va_range_free(bo
->va_handle
);
315 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo
*bo
)
317 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
319 if (bo
->ws
->debug_all_bos
) {
320 pthread_mutex_lock(&ws
->global_bo_list_lock
);
321 list_addtail(&bo
->global_list_item
, &ws
->global_bo_list
);
323 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
327 static struct radeon_winsys_bo
*
328 radv_amdgpu_winsys_bo_create(struct radeon_winsys
*_ws
,
331 enum radeon_bo_domain initial_domain
,
335 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
336 struct radv_amdgpu_winsys_bo
*bo
;
337 struct amdgpu_bo_alloc_request request
= {0};
338 amdgpu_bo_handle buf_handle
;
340 amdgpu_va_handle va_handle
;
342 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
347 unsigned virt_alignment
= alignment
;
348 if (size
>= ws
->info
.pte_fragment_size
)
349 virt_alignment
= MAX2(virt_alignment
, ws
->info
.pte_fragment_size
);
351 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
352 size
, virt_alignment
, 0, &va
, &va_handle
,
353 (flags
& RADEON_FLAG_32BIT
? AMDGPU_VA_RANGE_32_BIT
: 0) |
354 AMDGPU_VA_RANGE_HIGH
);
359 bo
->va_handle
= va_handle
;
362 bo
->is_virtual
= !!(flags
& RADEON_FLAG_VIRTUAL
);
365 if (flags
& RADEON_FLAG_VIRTUAL
) {
366 bo
->ranges
= realloc(NULL
, sizeof(struct radv_amdgpu_map_range
));
368 bo
->range_capacity
= 1;
370 bo
->ranges
[0].offset
= 0;
371 bo
->ranges
[0].size
= size
;
372 bo
->ranges
[0].bo
= NULL
;
373 bo
->ranges
[0].bo_offset
= 0;
375 radv_amdgpu_winsys_virtual_map(bo
, bo
->ranges
);
376 return (struct radeon_winsys_bo
*)bo
;
379 request
.alloc_size
= size
;
380 request
.phys_alignment
= alignment
;
382 if (initial_domain
& RADEON_DOMAIN_VRAM
)
383 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_VRAM
;
384 if (initial_domain
& RADEON_DOMAIN_GTT
)
385 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GTT
;
386 if (initial_domain
& RADEON_DOMAIN_GDS
)
387 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GDS
;
388 if (initial_domain
& RADEON_DOMAIN_OA
)
389 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_OA
;
391 if (flags
& RADEON_FLAG_CPU_ACCESS
)
392 request
.flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
393 if (flags
& RADEON_FLAG_NO_CPU_ACCESS
) {
394 bo
->base
.vram_no_cpu_access
= initial_domain
& RADEON_DOMAIN_VRAM
;
395 request
.flags
|= AMDGPU_GEM_CREATE_NO_CPU_ACCESS
;
397 if (flags
& RADEON_FLAG_GTT_WC
)
398 request
.flags
|= AMDGPU_GEM_CREATE_CPU_GTT_USWC
;
399 if (!(flags
& RADEON_FLAG_IMPLICIT_SYNC
) && ws
->info
.drm_minor
>= 22)
400 request
.flags
|= AMDGPU_GEM_CREATE_EXPLICIT_SYNC
;
401 if (flags
& RADEON_FLAG_NO_INTERPROCESS_SHARING
&&
402 ws
->info
.has_local_buffers
&&
403 (ws
->use_local_bos
|| (flags
& RADEON_FLAG_PREFER_LOCAL_BO
))) {
404 bo
->base
.is_local
= true;
405 request
.flags
|= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
;
408 /* this won't do anything on pre 4.9 kernels */
409 if (initial_domain
& RADEON_DOMAIN_VRAM
) {
410 if (ws
->zero_all_vram_allocs
|| (flags
& RADEON_FLAG_ZERO_VRAM
))
411 request
.flags
|= AMDGPU_GEM_CREATE_VRAM_CLEARED
;
414 r
= amdgpu_bo_alloc(ws
->dev
, &request
, &buf_handle
);
416 fprintf(stderr
, "amdgpu: Failed to allocate a buffer:\n");
417 fprintf(stderr
, "amdgpu: size : %"PRIu64
" bytes\n", size
);
418 fprintf(stderr
, "amdgpu: alignment : %u bytes\n", alignment
);
419 fprintf(stderr
, "amdgpu: domains : %u\n", initial_domain
);
423 r
= radv_amdgpu_bo_va_op(ws
, buf_handle
, 0, size
, va
, flags
, 0,
429 bo
->initial_domain
= initial_domain
;
430 bo
->is_shared
= false;
431 bo
->priority
= priority
;
433 r
= amdgpu_bo_export(buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
436 if (initial_domain
& RADEON_DOMAIN_VRAM
) {
437 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
438 * aren't mappable and they are counted as part of the VRAM
441 * Otherwise, buffers with the CPU_ACCESS flag or without any
442 * of both (imported buffers) are counted as part of the VRAM
443 * visible counter because they can be mapped.
445 if (bo
->base
.vram_no_cpu_access
) {
446 p_atomic_add(&ws
->allocated_vram
,
447 align64(bo
->size
, ws
->info
.gart_page_size
));
449 p_atomic_add(&ws
->allocated_vram_vis
,
450 align64(bo
->size
, ws
->info
.gart_page_size
));
454 if (initial_domain
& RADEON_DOMAIN_GTT
)
455 p_atomic_add(&ws
->allocated_gtt
,
456 align64(bo
->size
, ws
->info
.gart_page_size
));
458 radv_amdgpu_add_buffer_to_global_list(bo
);
459 return (struct radeon_winsys_bo
*)bo
;
461 amdgpu_bo_free(buf_handle
);
464 amdgpu_va_range_free(va_handle
);
472 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo
*_bo
)
474 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
477 ret
= amdgpu_bo_cpu_map(bo
->bo
, &data
);
484 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo
*_bo
)
486 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
487 amdgpu_bo_cpu_unmap(bo
->bo
);
491 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys
*ws
,
492 uint64_t size
, unsigned alignment
)
494 uint64_t vm_alignment
= alignment
;
496 /* Increase the VM alignment for faster address translation. */
497 if (size
>= ws
->info
.pte_fragment_size
)
498 vm_alignment
= MAX2(vm_alignment
, ws
->info
.pte_fragment_size
);
500 /* Gfx9: Increase the VM alignment to the most significant bit set
501 * in the size for faster address translation.
503 if (ws
->info
.chip_class
>= GFX9
) {
504 unsigned msb
= util_last_bit64(size
); /* 0 = no bit is set */
505 uint64_t msb_alignment
= msb
? 1ull << (msb
- 1) : 0;
507 vm_alignment
= MAX2(vm_alignment
, msb_alignment
);
512 static struct radeon_winsys_bo
*
513 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys
*_ws
,
518 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
519 amdgpu_bo_handle buf_handle
;
520 struct radv_amdgpu_winsys_bo
*bo
;
522 amdgpu_va_handle va_handle
;
523 uint64_t vm_alignment
;
525 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
529 if (amdgpu_create_bo_from_user_mem(ws
->dev
, pointer
, size
, &buf_handle
))
532 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
535 vm_alignment
= radv_amdgpu_get_optimal_vm_alignment(ws
, size
,
536 ws
->info
.gart_page_size
);
538 if (amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
539 size
, vm_alignment
, 0, &va
, &va_handle
,
540 AMDGPU_VA_RANGE_HIGH
))
543 if (amdgpu_bo_va_op(buf_handle
, 0, size
, va
, 0, AMDGPU_VA_OP_MAP
))
548 bo
->va_handle
= va_handle
;
553 bo
->initial_domain
= RADEON_DOMAIN_GTT
;
554 bo
->priority
= priority
;
556 ASSERTED
int r
= amdgpu_bo_export(buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
559 p_atomic_add(&ws
->allocated_gtt
,
560 align64(bo
->size
, ws
->info
.gart_page_size
));
562 radv_amdgpu_add_buffer_to_global_list(bo
);
563 return (struct radeon_winsys_bo
*)bo
;
566 amdgpu_va_range_free(va_handle
);
569 amdgpu_bo_free(buf_handle
);
576 static struct radeon_winsys_bo
*
577 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys
*_ws
,
578 int fd
, unsigned priority
,
579 uint64_t *alloc_size
)
581 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
582 struct radv_amdgpu_winsys_bo
*bo
;
584 amdgpu_va_handle va_handle
;
585 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
586 struct amdgpu_bo_import_result result
= {0};
587 struct amdgpu_bo_info info
= {0};
588 enum radeon_bo_domain initial
= 0;
590 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
594 r
= amdgpu_bo_import(ws
->dev
, type
, fd
, &result
);
598 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
603 *alloc_size
= info
.alloc_size
;
606 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
607 result
.alloc_size
, 1 << 20, 0, &va
, &va_handle
,
608 AMDGPU_VA_RANGE_HIGH
);
612 r
= radv_amdgpu_bo_va_op(ws
, result
.buf_handle
, 0, result
.alloc_size
,
613 va
, 0, 0, AMDGPU_VA_OP_MAP
);
617 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
618 initial
|= RADEON_DOMAIN_VRAM
;
619 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
620 initial
|= RADEON_DOMAIN_GTT
;
622 bo
->bo
= result
.buf_handle
;
624 bo
->va_handle
= va_handle
;
625 bo
->initial_domain
= initial
;
626 bo
->size
= result
.alloc_size
;
627 bo
->is_shared
= true;
629 bo
->priority
= priority
;
632 r
= amdgpu_bo_export(result
.buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
635 if (bo
->initial_domain
& RADEON_DOMAIN_VRAM
)
636 p_atomic_add(&ws
->allocated_vram
,
637 align64(bo
->size
, ws
->info
.gart_page_size
));
638 if (bo
->initial_domain
& RADEON_DOMAIN_GTT
)
639 p_atomic_add(&ws
->allocated_gtt
,
640 align64(bo
->size
, ws
->info
.gart_page_size
));
642 radv_amdgpu_add_buffer_to_global_list(bo
);
643 return (struct radeon_winsys_bo
*)bo
;
645 amdgpu_va_range_free(va_handle
);
648 amdgpu_bo_free(result
.buf_handle
);
656 radv_amdgpu_winsys_get_fd(struct radeon_winsys
*_ws
,
657 struct radeon_winsys_bo
*_bo
,
660 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
661 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
664 r
= amdgpu_bo_export(bo
->bo
, type
, &handle
);
669 bo
->is_shared
= true;
674 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys
*_ws
, int fd
,
675 enum radeon_bo_domain
*domains
,
676 enum radeon_bo_flag
*flags
)
678 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
679 struct amdgpu_bo_import_result result
= {0};
680 struct amdgpu_bo_info info
= {0};
686 r
= amdgpu_bo_import(ws
->dev
, amdgpu_bo_handle_type_dma_buf_fd
, fd
, &result
);
690 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
691 amdgpu_bo_free(result
.buf_handle
);
695 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
696 *domains
|= RADEON_DOMAIN_VRAM
;
697 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
698 *domains
|= RADEON_DOMAIN_GTT
;
699 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GDS
)
700 *domains
|= RADEON_DOMAIN_GDS
;
701 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_OA
)
702 *domains
|= RADEON_DOMAIN_OA
;
704 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
)
705 *flags
|= RADEON_FLAG_CPU_ACCESS
;
706 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_NO_CPU_ACCESS
)
707 *flags
|= RADEON_FLAG_NO_CPU_ACCESS
;
708 if (!(info
.alloc_flags
& AMDGPU_GEM_CREATE_EXPLICIT_SYNC
))
709 *flags
|= RADEON_FLAG_IMPLICIT_SYNC
;
710 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_CPU_GTT_USWC
)
711 *flags
|= RADEON_FLAG_GTT_WC
;
712 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
)
713 *flags
|= RADEON_FLAG_NO_INTERPROCESS_SHARING
| RADEON_FLAG_PREFER_LOCAL_BO
;
714 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_VRAM_CLEARED
)
715 *flags
|= RADEON_FLAG_ZERO_VRAM
;
719 static unsigned eg_tile_split(unsigned tile_split
)
721 switch (tile_split
) {
722 case 0: tile_split
= 64; break;
723 case 1: tile_split
= 128; break;
724 case 2: tile_split
= 256; break;
725 case 3: tile_split
= 512; break;
727 case 4: tile_split
= 1024; break;
728 case 5: tile_split
= 2048; break;
729 case 6: tile_split
= 4096; break;
734 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split
)
736 switch (eg_tile_split
) {
749 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo
*_bo
,
750 struct radeon_bo_metadata
*md
)
752 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
753 struct amdgpu_bo_metadata metadata
= {0};
754 uint64_t tiling_flags
= 0;
756 if (bo
->ws
->info
.chip_class
>= GFX9
) {
757 tiling_flags
|= AMDGPU_TILING_SET(SWIZZLE_MODE
, md
->u
.gfx9
.swizzle_mode
);
758 tiling_flags
|= AMDGPU_TILING_SET(SCANOUT
, md
->u
.gfx9
.scanout
);
760 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
761 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 4); /* 2D_TILED_THIN1 */
762 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
763 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 2); /* 1D_TILED_THIN1 */
765 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 1); /* LINEAR_ALIGNED */
767 tiling_flags
|= AMDGPU_TILING_SET(PIPE_CONFIG
, md
->u
.legacy
.pipe_config
);
768 tiling_flags
|= AMDGPU_TILING_SET(BANK_WIDTH
, util_logbase2(md
->u
.legacy
.bankw
));
769 tiling_flags
|= AMDGPU_TILING_SET(BANK_HEIGHT
, util_logbase2(md
->u
.legacy
.bankh
));
770 if (md
->u
.legacy
.tile_split
)
771 tiling_flags
|= AMDGPU_TILING_SET(TILE_SPLIT
, radv_eg_tile_split_rev(md
->u
.legacy
.tile_split
));
772 tiling_flags
|= AMDGPU_TILING_SET(MACRO_TILE_ASPECT
, util_logbase2(md
->u
.legacy
.mtilea
));
773 tiling_flags
|= AMDGPU_TILING_SET(NUM_BANKS
, util_logbase2(md
->u
.legacy
.num_banks
)-1);
775 if (md
->u
.legacy
.scanout
)
776 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 0); /* DISPLAY_MICRO_TILING */
778 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 1); /* THIN_MICRO_TILING */
781 metadata
.tiling_info
= tiling_flags
;
782 metadata
.size_metadata
= md
->size_metadata
;
783 memcpy(metadata
.umd_metadata
, md
->metadata
, sizeof(md
->metadata
));
785 amdgpu_bo_set_metadata(bo
->bo
, &metadata
);
789 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo
*_bo
,
790 struct radeon_bo_metadata
*md
)
792 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
793 struct amdgpu_bo_info info
= {0};
795 int r
= amdgpu_bo_query_info(bo
->bo
, &info
);
799 uint64_t tiling_flags
= info
.metadata
.tiling_info
;
801 if (bo
->ws
->info
.chip_class
>= GFX9
) {
802 md
->u
.gfx9
.swizzle_mode
= AMDGPU_TILING_GET(tiling_flags
, SWIZZLE_MODE
);
803 md
->u
.gfx9
.scanout
= AMDGPU_TILING_GET(tiling_flags
, SCANOUT
);
805 md
->u
.legacy
.microtile
= RADEON_LAYOUT_LINEAR
;
806 md
->u
.legacy
.macrotile
= RADEON_LAYOUT_LINEAR
;
808 if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 4) /* 2D_TILED_THIN1 */
809 md
->u
.legacy
.macrotile
= RADEON_LAYOUT_TILED
;
810 else if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 2) /* 1D_TILED_THIN1 */
811 md
->u
.legacy
.microtile
= RADEON_LAYOUT_TILED
;
813 md
->u
.legacy
.pipe_config
= AMDGPU_TILING_GET(tiling_flags
, PIPE_CONFIG
);
814 md
->u
.legacy
.bankw
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_WIDTH
);
815 md
->u
.legacy
.bankh
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_HEIGHT
);
816 md
->u
.legacy
.tile_split
= eg_tile_split(AMDGPU_TILING_GET(tiling_flags
, TILE_SPLIT
));
817 md
->u
.legacy
.mtilea
= 1 << AMDGPU_TILING_GET(tiling_flags
, MACRO_TILE_ASPECT
);
818 md
->u
.legacy
.num_banks
= 2 << AMDGPU_TILING_GET(tiling_flags
, NUM_BANKS
);
819 md
->u
.legacy
.scanout
= AMDGPU_TILING_GET(tiling_flags
, MICRO_TILE_MODE
) == 0; /* DISPLAY */
822 md
->size_metadata
= info
.metadata
.size_metadata
;
823 memcpy(md
->metadata
, info
.metadata
.umd_metadata
, sizeof(md
->metadata
));
826 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys
*ws
)
828 ws
->base
.buffer_create
= radv_amdgpu_winsys_bo_create
;
829 ws
->base
.buffer_destroy
= radv_amdgpu_winsys_bo_destroy
;
830 ws
->base
.buffer_map
= radv_amdgpu_winsys_bo_map
;
831 ws
->base
.buffer_unmap
= radv_amdgpu_winsys_bo_unmap
;
832 ws
->base
.buffer_from_ptr
= radv_amdgpu_winsys_bo_from_ptr
;
833 ws
->base
.buffer_from_fd
= radv_amdgpu_winsys_bo_from_fd
;
834 ws
->base
.buffer_get_fd
= radv_amdgpu_winsys_get_fd
;
835 ws
->base
.buffer_set_metadata
= radv_amdgpu_winsys_bo_set_metadata
;
836 ws
->base
.buffer_get_metadata
= radv_amdgpu_winsys_bo_get_metadata
;
837 ws
->base
.buffer_virtual_bind
= radv_amdgpu_winsys_bo_virtual_bind
;
838 ws
->base
.buffer_get_flags_from_fd
= radv_amdgpu_bo_get_flags_from_fd
;