2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include "radv_amdgpu_bo.h"
34 #include "drm-uapi/amdgpu_drm.h"
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
);
46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys
*ws
,
54 uint64_t flags
= AMDGPU_VM_PAGE_READABLE
|
55 AMDGPU_VM_PAGE_EXECUTABLE
;
57 if ((bo_flags
& RADEON_FLAG_VA_UNCACHED
) && ws
->info
.chip_class
>= GFX9
)
58 flags
|= AMDGPU_VM_MTYPE_UC
;
60 if (!(bo_flags
& RADEON_FLAG_READ_ONLY
))
61 flags
|= AMDGPU_VM_PAGE_WRITEABLE
;
63 size
= align64(size
, getpagesize());
65 return amdgpu_bo_va_op_raw(ws
->dev
, bo
, offset
, size
, addr
,
70 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo
*bo
,
71 const struct radv_amdgpu_map_range
*range
)
76 return; /* TODO: PRT mapping */
78 p_atomic_inc(&range
->bo
->ref_count
);
79 int r
= radv_amdgpu_bo_va_op(bo
->ws
, range
->bo
->bo
, range
->bo_offset
,
80 range
->size
, range
->offset
+ bo
->base
.va
,
87 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo
*bo
,
88 const struct radv_amdgpu_map_range
*range
)
93 return; /* TODO: PRT mapping */
95 int r
= radv_amdgpu_bo_va_op(bo
->ws
, range
->bo
->bo
, range
->bo_offset
,
96 range
->size
, range
->offset
+ bo
->base
.va
,
97 0, AMDGPU_VA_OP_UNMAP
);
100 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo
*)range
->bo
);
103 static int bo_comparator(const void *ap
, const void *bp
) {
104 struct radv_amdgpu_bo
*a
= *(struct radv_amdgpu_bo
*const *)ap
;
105 struct radv_amdgpu_bo
*b
= *(struct radv_amdgpu_bo
*const *)bp
;
106 return (a
> b
) ? 1 : (a
< b
) ? -1 : 0;
110 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo
*bo
)
112 if (bo
->bo_capacity
< bo
->range_count
) {
113 uint32_t new_count
= MAX2(bo
->bo_capacity
* 2, bo
->range_count
);
114 bo
->bos
= realloc(bo
->bos
, new_count
* sizeof(struct radv_amdgpu_winsys_bo
*));
115 bo
->bo_capacity
= new_count
;
118 uint32_t temp_bo_count
= 0;
119 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
)
120 if (bo
->ranges
[i
].bo
)
121 bo
->bos
[temp_bo_count
++] = bo
->ranges
[i
].bo
;
123 qsort(bo
->bos
, temp_bo_count
, sizeof(struct radv_amdgpu_winsys_bo
*), &bo_comparator
);
125 uint32_t final_bo_count
= 1;
126 for (uint32_t i
= 1; i
< temp_bo_count
; ++i
)
127 if (bo
->bos
[i
] != bo
->bos
[i
- 1])
128 bo
->bos
[final_bo_count
++] = bo
->bos
[i
];
130 bo
->bo_count
= final_bo_count
;
134 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo
*_parent
,
135 uint64_t offset
, uint64_t size
,
136 struct radeon_winsys_bo
*_bo
, uint64_t bo_offset
)
138 struct radv_amdgpu_winsys_bo
*parent
= (struct radv_amdgpu_winsys_bo
*)_parent
;
139 struct radv_amdgpu_winsys_bo
*bo
= (struct radv_amdgpu_winsys_bo
*)_bo
;
140 int range_count_delta
, new_idx
;
142 struct radv_amdgpu_map_range new_first
, new_last
;
144 assert(parent
->is_virtual
);
145 assert(!bo
|| !bo
->is_virtual
);
150 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
151 if (parent
->range_capacity
- parent
->range_count
< 2) {
152 parent
->range_capacity
+= 2;
153 parent
->ranges
= realloc(parent
->ranges
,
154 parent
->range_capacity
* sizeof(struct radv_amdgpu_map_range
));
158 * [first, last] is exactly the range of ranges that either overlap the
159 * new parent, or are adjacent to it. This corresponds to the bind ranges
162 while(first
+ 1 < parent
->range_count
&& parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
< offset
)
166 while(last
+ 1 < parent
->range_count
&& parent
->ranges
[last
].offset
<= offset
+ size
)
169 /* Whether the first or last range are going to be totally removed or just
170 * resized/left alone. Note that in the case of first == last, we will split
171 * this into a part before and after the new range. The remove flag is then
172 * whether to not create the corresponding split part. */
173 bool remove_first
= parent
->ranges
[first
].offset
== offset
;
174 bool remove_last
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
== offset
+ size
;
175 bool unmapped_first
= false;
177 assert(parent
->ranges
[first
].offset
<= offset
);
178 assert(parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
>= offset
+ size
);
180 /* Try to merge the new range with the first range. */
181 if (parent
->ranges
[first
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[first
].offset
- parent
->ranges
[first
].bo_offset
)) {
182 size
+= offset
- parent
->ranges
[first
].offset
;
183 offset
= parent
->ranges
[first
].offset
;
184 bo_offset
= parent
->ranges
[first
].bo_offset
;
188 /* Try to merge the new range with the last range. */
189 if (parent
->ranges
[last
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[last
].offset
- parent
->ranges
[last
].bo_offset
)) {
190 size
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
- offset
;
194 range_count_delta
= 1 - (last
- first
+ 1) + !remove_first
+ !remove_last
;
195 new_idx
= first
+ !remove_first
;
197 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
198 for (int i
= first
+ 1; i
< last
; ++i
)
199 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ i
);
201 /* If the first/last range are not left alone we unmap then and optionally map
202 * them again after modifications. Not that this implicitly can do the splitting
203 * if first == last. */
204 new_first
= parent
->ranges
[first
];
205 new_last
= parent
->ranges
[last
];
207 if (parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
> offset
|| remove_first
) {
208 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ first
);
209 unmapped_first
= true;
212 new_first
.size
= offset
- new_first
.offset
;
213 radv_amdgpu_winsys_virtual_map(parent
, &new_first
);
217 if (parent
->ranges
[last
].offset
< offset
+ size
|| remove_last
) {
218 if (first
!= last
|| !unmapped_first
)
219 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ last
);
222 new_last
.size
-= offset
+ size
- new_last
.offset
;
223 new_last
.offset
= offset
+ size
;
224 radv_amdgpu_winsys_virtual_map(parent
, &new_last
);
228 /* Moves the range list after last to account for the changed number of ranges. */
229 memmove(parent
->ranges
+ last
+ 1 + range_count_delta
, parent
->ranges
+ last
+ 1,
230 sizeof(struct radv_amdgpu_map_range
) * (parent
->range_count
- last
- 1));
233 parent
->ranges
[first
] = new_first
;
236 parent
->ranges
[new_idx
+ 1] = new_last
;
238 /* Actually set up the new range. */
239 parent
->ranges
[new_idx
].offset
= offset
;
240 parent
->ranges
[new_idx
].size
= size
;
241 parent
->ranges
[new_idx
].bo
= bo
;
242 parent
->ranges
[new_idx
].bo_offset
= bo_offset
;
244 radv_amdgpu_winsys_virtual_map(parent
, parent
->ranges
+ new_idx
);
246 parent
->range_count
+= range_count_delta
;
248 radv_amdgpu_winsys_rebuild_bo_list(parent
);
251 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
)
253 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
254 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
256 if (p_atomic_dec_return(&bo
->ref_count
))
258 if (bo
->is_virtual
) {
259 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
) {
260 radv_amdgpu_winsys_virtual_unmap(bo
, bo
->ranges
+ i
);
265 if (bo
->ws
->debug_all_bos
) {
266 pthread_mutex_lock(&bo
->ws
->global_bo_list_lock
);
267 list_del(&bo
->global_list_item
);
268 bo
->ws
->num_buffers
--;
269 pthread_mutex_unlock(&bo
->ws
->global_bo_list_lock
);
271 radv_amdgpu_bo_va_op(bo
->ws
, bo
->bo
, 0, bo
->size
, bo
->base
.va
,
272 0, AMDGPU_VA_OP_UNMAP
);
273 amdgpu_bo_free(bo
->bo
);
276 if (bo
->initial_domain
& RADEON_DOMAIN_VRAM
) {
277 if (bo
->base
.vram_no_cpu_access
) {
278 p_atomic_add(&ws
->allocated_vram
,
279 -align64(bo
->size
, ws
->info
.gart_page_size
));
281 p_atomic_add(&ws
->allocated_vram_vis
,
282 -align64(bo
->size
, ws
->info
.gart_page_size
));
286 if (bo
->initial_domain
& RADEON_DOMAIN_GTT
)
287 p_atomic_add(&ws
->allocated_gtt
,
288 -align64(bo
->size
, ws
->info
.gart_page_size
));
290 amdgpu_va_range_free(bo
->va_handle
);
294 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo
*bo
)
296 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
298 if (bo
->ws
->debug_all_bos
) {
299 pthread_mutex_lock(&ws
->global_bo_list_lock
);
300 list_addtail(&bo
->global_list_item
, &ws
->global_bo_list
);
302 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
306 static struct radeon_winsys_bo
*
307 radv_amdgpu_winsys_bo_create(struct radeon_winsys
*_ws
,
310 enum radeon_bo_domain initial_domain
,
314 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
315 struct radv_amdgpu_winsys_bo
*bo
;
316 struct amdgpu_bo_alloc_request request
= {0};
317 amdgpu_bo_handle buf_handle
;
319 amdgpu_va_handle va_handle
;
321 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
326 unsigned virt_alignment
= alignment
;
327 if (size
>= ws
->info
.pte_fragment_size
)
328 virt_alignment
= MAX2(virt_alignment
, ws
->info
.pte_fragment_size
);
330 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
331 size
, virt_alignment
, 0, &va
, &va_handle
,
332 (flags
& RADEON_FLAG_32BIT
? AMDGPU_VA_RANGE_32_BIT
: 0) |
333 AMDGPU_VA_RANGE_HIGH
);
338 bo
->va_handle
= va_handle
;
341 bo
->is_virtual
= !!(flags
& RADEON_FLAG_VIRTUAL
);
344 if (flags
& RADEON_FLAG_VIRTUAL
) {
345 bo
->ranges
= realloc(NULL
, sizeof(struct radv_amdgpu_map_range
));
347 bo
->range_capacity
= 1;
349 bo
->ranges
[0].offset
= 0;
350 bo
->ranges
[0].size
= size
;
351 bo
->ranges
[0].bo
= NULL
;
352 bo
->ranges
[0].bo_offset
= 0;
354 radv_amdgpu_winsys_virtual_map(bo
, bo
->ranges
);
355 return (struct radeon_winsys_bo
*)bo
;
358 request
.alloc_size
= size
;
359 request
.phys_alignment
= alignment
;
361 if (initial_domain
& RADEON_DOMAIN_VRAM
)
362 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_VRAM
;
363 if (initial_domain
& RADEON_DOMAIN_GTT
)
364 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GTT
;
365 if (initial_domain
& RADEON_DOMAIN_GDS
)
366 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GDS
;
367 if (initial_domain
& RADEON_DOMAIN_OA
)
368 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_OA
;
370 if (flags
& RADEON_FLAG_CPU_ACCESS
)
371 request
.flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
372 if (flags
& RADEON_FLAG_NO_CPU_ACCESS
) {
373 bo
->base
.vram_no_cpu_access
= initial_domain
& RADEON_DOMAIN_VRAM
;
374 request
.flags
|= AMDGPU_GEM_CREATE_NO_CPU_ACCESS
;
376 if (flags
& RADEON_FLAG_GTT_WC
)
377 request
.flags
|= AMDGPU_GEM_CREATE_CPU_GTT_USWC
;
378 if (!(flags
& RADEON_FLAG_IMPLICIT_SYNC
) && ws
->info
.drm_minor
>= 22)
379 request
.flags
|= AMDGPU_GEM_CREATE_EXPLICIT_SYNC
;
380 if (flags
& RADEON_FLAG_NO_INTERPROCESS_SHARING
&&
381 ws
->info
.has_local_buffers
&&
382 (ws
->use_local_bos
|| (flags
& RADEON_FLAG_PREFER_LOCAL_BO
))) {
383 bo
->base
.is_local
= true;
384 request
.flags
|= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
;
387 /* this won't do anything on pre 4.9 kernels */
388 if (initial_domain
& RADEON_DOMAIN_VRAM
) {
389 if (ws
->zero_all_vram_allocs
|| (flags
& RADEON_FLAG_ZERO_VRAM
))
390 request
.flags
|= AMDGPU_GEM_CREATE_VRAM_CLEARED
;
393 r
= amdgpu_bo_alloc(ws
->dev
, &request
, &buf_handle
);
395 fprintf(stderr
, "amdgpu: Failed to allocate a buffer:\n");
396 fprintf(stderr
, "amdgpu: size : %"PRIu64
" bytes\n", size
);
397 fprintf(stderr
, "amdgpu: alignment : %u bytes\n", alignment
);
398 fprintf(stderr
, "amdgpu: domains : %u\n", initial_domain
);
402 r
= radv_amdgpu_bo_va_op(ws
, buf_handle
, 0, size
, va
, flags
,
408 bo
->initial_domain
= initial_domain
;
409 bo
->is_shared
= false;
410 bo
->priority
= priority
;
412 r
= amdgpu_bo_export(buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
415 if (initial_domain
& RADEON_DOMAIN_VRAM
) {
416 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
417 * aren't mappable and they are counted as part of the VRAM
420 * Otherwise, buffers with the CPU_ACCESS flag or without any
421 * of both (imported buffers) are counted as part of the VRAM
422 * visible counter because they can be mapped.
424 if (bo
->base
.vram_no_cpu_access
) {
425 p_atomic_add(&ws
->allocated_vram
,
426 align64(bo
->size
, ws
->info
.gart_page_size
));
428 p_atomic_add(&ws
->allocated_vram_vis
,
429 align64(bo
->size
, ws
->info
.gart_page_size
));
433 if (initial_domain
& RADEON_DOMAIN_GTT
)
434 p_atomic_add(&ws
->allocated_gtt
,
435 align64(bo
->size
, ws
->info
.gart_page_size
));
437 radv_amdgpu_add_buffer_to_global_list(bo
);
438 return (struct radeon_winsys_bo
*)bo
;
440 amdgpu_bo_free(buf_handle
);
443 amdgpu_va_range_free(va_handle
);
451 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo
*_bo
)
453 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
456 ret
= amdgpu_bo_cpu_map(bo
->bo
, &data
);
463 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo
*_bo
)
465 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
466 amdgpu_bo_cpu_unmap(bo
->bo
);
470 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys
*ws
,
471 uint64_t size
, unsigned alignment
)
473 uint64_t vm_alignment
= alignment
;
475 /* Increase the VM alignment for faster address translation. */
476 if (size
>= ws
->info
.pte_fragment_size
)
477 vm_alignment
= MAX2(vm_alignment
, ws
->info
.pte_fragment_size
);
479 /* Gfx9: Increase the VM alignment to the most significant bit set
480 * in the size for faster address translation.
482 if (ws
->info
.chip_class
>= GFX9
) {
483 unsigned msb
= util_last_bit64(size
); /* 0 = no bit is set */
484 uint64_t msb_alignment
= msb
? 1ull << (msb
- 1) : 0;
486 vm_alignment
= MAX2(vm_alignment
, msb_alignment
);
491 static struct radeon_winsys_bo
*
492 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys
*_ws
,
497 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
498 amdgpu_bo_handle buf_handle
;
499 struct radv_amdgpu_winsys_bo
*bo
;
501 amdgpu_va_handle va_handle
;
502 uint64_t vm_alignment
;
504 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
508 if (amdgpu_create_bo_from_user_mem(ws
->dev
, pointer
, size
, &buf_handle
))
511 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
514 vm_alignment
= radv_amdgpu_get_optimal_vm_alignment(ws
, size
,
515 ws
->info
.gart_page_size
);
517 if (amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
518 size
, vm_alignment
, 0, &va
, &va_handle
,
519 AMDGPU_VA_RANGE_HIGH
))
522 if (amdgpu_bo_va_op(buf_handle
, 0, size
, va
, 0, AMDGPU_VA_OP_MAP
))
527 bo
->va_handle
= va_handle
;
532 bo
->initial_domain
= RADEON_DOMAIN_GTT
;
533 bo
->priority
= priority
;
535 ASSERTED
int r
= amdgpu_bo_export(buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
538 p_atomic_add(&ws
->allocated_gtt
,
539 align64(bo
->size
, ws
->info
.gart_page_size
));
541 radv_amdgpu_add_buffer_to_global_list(bo
);
542 return (struct radeon_winsys_bo
*)bo
;
545 amdgpu_va_range_free(va_handle
);
548 amdgpu_bo_free(buf_handle
);
555 static struct radeon_winsys_bo
*
556 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys
*_ws
,
557 int fd
, unsigned priority
,
558 uint64_t *alloc_size
)
560 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
561 struct radv_amdgpu_winsys_bo
*bo
;
563 amdgpu_va_handle va_handle
;
564 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
565 struct amdgpu_bo_import_result result
= {0};
566 struct amdgpu_bo_info info
= {0};
567 enum radeon_bo_domain initial
= 0;
569 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
573 r
= amdgpu_bo_import(ws
->dev
, type
, fd
, &result
);
577 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
582 *alloc_size
= info
.alloc_size
;
585 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
586 result
.alloc_size
, 1 << 20, 0, &va
, &va_handle
,
587 AMDGPU_VA_RANGE_HIGH
);
591 r
= radv_amdgpu_bo_va_op(ws
, result
.buf_handle
, 0, result
.alloc_size
,
592 va
, 0, AMDGPU_VA_OP_MAP
);
596 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
597 initial
|= RADEON_DOMAIN_VRAM
;
598 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
599 initial
|= RADEON_DOMAIN_GTT
;
601 bo
->bo
= result
.buf_handle
;
603 bo
->va_handle
= va_handle
;
604 bo
->initial_domain
= initial
;
605 bo
->size
= result
.alloc_size
;
606 bo
->is_shared
= true;
608 bo
->priority
= priority
;
611 r
= amdgpu_bo_export(result
.buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
614 if (bo
->initial_domain
& RADEON_DOMAIN_VRAM
)
615 p_atomic_add(&ws
->allocated_vram
,
616 align64(bo
->size
, ws
->info
.gart_page_size
));
617 if (bo
->initial_domain
& RADEON_DOMAIN_GTT
)
618 p_atomic_add(&ws
->allocated_gtt
,
619 align64(bo
->size
, ws
->info
.gart_page_size
));
621 radv_amdgpu_add_buffer_to_global_list(bo
);
622 return (struct radeon_winsys_bo
*)bo
;
624 amdgpu_va_range_free(va_handle
);
627 amdgpu_bo_free(result
.buf_handle
);
635 radv_amdgpu_winsys_get_fd(struct radeon_winsys
*_ws
,
636 struct radeon_winsys_bo
*_bo
,
639 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
640 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
643 r
= amdgpu_bo_export(bo
->bo
, type
, &handle
);
648 bo
->is_shared
= true;
653 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys
*_ws
, int fd
,
654 enum radeon_bo_domain
*domains
,
655 enum radeon_bo_flag
*flags
)
657 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
658 struct amdgpu_bo_import_result result
= {0};
659 struct amdgpu_bo_info info
= {0};
665 r
= amdgpu_bo_import(ws
->dev
, amdgpu_bo_handle_type_dma_buf_fd
, fd
, &result
);
669 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
670 amdgpu_bo_free(result
.buf_handle
);
674 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
675 *domains
|= RADEON_DOMAIN_VRAM
;
676 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
677 *domains
|= RADEON_DOMAIN_GTT
;
678 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GDS
)
679 *domains
|= RADEON_DOMAIN_GDS
;
680 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_OA
)
681 *domains
|= RADEON_DOMAIN_OA
;
683 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
)
684 *flags
|= RADEON_FLAG_CPU_ACCESS
;
685 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_NO_CPU_ACCESS
)
686 *flags
|= RADEON_FLAG_NO_CPU_ACCESS
;
687 if (!(info
.alloc_flags
& AMDGPU_GEM_CREATE_EXPLICIT_SYNC
))
688 *flags
|= RADEON_FLAG_IMPLICIT_SYNC
;
689 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_CPU_GTT_USWC
)
690 *flags
|= RADEON_FLAG_GTT_WC
;
691 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
)
692 *flags
|= RADEON_FLAG_NO_INTERPROCESS_SHARING
| RADEON_FLAG_PREFER_LOCAL_BO
;
693 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_VRAM_CLEARED
)
694 *flags
|= RADEON_FLAG_ZERO_VRAM
;
698 static unsigned eg_tile_split(unsigned tile_split
)
700 switch (tile_split
) {
701 case 0: tile_split
= 64; break;
702 case 1: tile_split
= 128; break;
703 case 2: tile_split
= 256; break;
704 case 3: tile_split
= 512; break;
706 case 4: tile_split
= 1024; break;
707 case 5: tile_split
= 2048; break;
708 case 6: tile_split
= 4096; break;
713 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split
)
715 switch (eg_tile_split
) {
728 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo
*_bo
,
729 struct radeon_bo_metadata
*md
)
731 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
732 struct amdgpu_bo_metadata metadata
= {0};
733 uint64_t tiling_flags
= 0;
735 if (bo
->ws
->info
.chip_class
>= GFX9
) {
736 tiling_flags
|= AMDGPU_TILING_SET(SWIZZLE_MODE
, md
->u
.gfx9
.swizzle_mode
);
737 tiling_flags
|= AMDGPU_TILING_SET(SCANOUT
, md
->u
.gfx9
.scanout
);
739 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
740 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 4); /* 2D_TILED_THIN1 */
741 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
742 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 2); /* 1D_TILED_THIN1 */
744 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 1); /* LINEAR_ALIGNED */
746 tiling_flags
|= AMDGPU_TILING_SET(PIPE_CONFIG
, md
->u
.legacy
.pipe_config
);
747 tiling_flags
|= AMDGPU_TILING_SET(BANK_WIDTH
, util_logbase2(md
->u
.legacy
.bankw
));
748 tiling_flags
|= AMDGPU_TILING_SET(BANK_HEIGHT
, util_logbase2(md
->u
.legacy
.bankh
));
749 if (md
->u
.legacy
.tile_split
)
750 tiling_flags
|= AMDGPU_TILING_SET(TILE_SPLIT
, radv_eg_tile_split_rev(md
->u
.legacy
.tile_split
));
751 tiling_flags
|= AMDGPU_TILING_SET(MACRO_TILE_ASPECT
, util_logbase2(md
->u
.legacy
.mtilea
));
752 tiling_flags
|= AMDGPU_TILING_SET(NUM_BANKS
, util_logbase2(md
->u
.legacy
.num_banks
)-1);
754 if (md
->u
.legacy
.scanout
)
755 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 0); /* DISPLAY_MICRO_TILING */
757 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 1); /* THIN_MICRO_TILING */
760 metadata
.tiling_info
= tiling_flags
;
761 metadata
.size_metadata
= md
->size_metadata
;
762 memcpy(metadata
.umd_metadata
, md
->metadata
, sizeof(md
->metadata
));
764 amdgpu_bo_set_metadata(bo
->bo
, &metadata
);
768 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo
*_bo
,
769 struct radeon_bo_metadata
*md
)
771 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
772 struct amdgpu_bo_info info
= {0};
774 int r
= amdgpu_bo_query_info(bo
->bo
, &info
);
778 uint64_t tiling_flags
= info
.metadata
.tiling_info
;
780 if (bo
->ws
->info
.chip_class
>= GFX9
) {
781 md
->u
.gfx9
.swizzle_mode
= AMDGPU_TILING_GET(tiling_flags
, SWIZZLE_MODE
);
782 md
->u
.gfx9
.scanout
= AMDGPU_TILING_GET(tiling_flags
, SCANOUT
);
784 md
->u
.legacy
.microtile
= RADEON_LAYOUT_LINEAR
;
785 md
->u
.legacy
.macrotile
= RADEON_LAYOUT_LINEAR
;
787 if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 4) /* 2D_TILED_THIN1 */
788 md
->u
.legacy
.macrotile
= RADEON_LAYOUT_TILED
;
789 else if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 2) /* 1D_TILED_THIN1 */
790 md
->u
.legacy
.microtile
= RADEON_LAYOUT_TILED
;
792 md
->u
.legacy
.pipe_config
= AMDGPU_TILING_GET(tiling_flags
, PIPE_CONFIG
);
793 md
->u
.legacy
.bankw
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_WIDTH
);
794 md
->u
.legacy
.bankh
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_HEIGHT
);
795 md
->u
.legacy
.tile_split
= eg_tile_split(AMDGPU_TILING_GET(tiling_flags
, TILE_SPLIT
));
796 md
->u
.legacy
.mtilea
= 1 << AMDGPU_TILING_GET(tiling_flags
, MACRO_TILE_ASPECT
);
797 md
->u
.legacy
.num_banks
= 2 << AMDGPU_TILING_GET(tiling_flags
, NUM_BANKS
);
798 md
->u
.legacy
.scanout
= AMDGPU_TILING_GET(tiling_flags
, MICRO_TILE_MODE
) == 0; /* DISPLAY */
801 md
->size_metadata
= info
.metadata
.size_metadata
;
802 memcpy(md
->metadata
, info
.metadata
.umd_metadata
, sizeof(md
->metadata
));
805 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys
*ws
)
807 ws
->base
.buffer_create
= radv_amdgpu_winsys_bo_create
;
808 ws
->base
.buffer_destroy
= radv_amdgpu_winsys_bo_destroy
;
809 ws
->base
.buffer_map
= radv_amdgpu_winsys_bo_map
;
810 ws
->base
.buffer_unmap
= radv_amdgpu_winsys_bo_unmap
;
811 ws
->base
.buffer_from_ptr
= radv_amdgpu_winsys_bo_from_ptr
;
812 ws
->base
.buffer_from_fd
= radv_amdgpu_winsys_bo_from_fd
;
813 ws
->base
.buffer_get_fd
= radv_amdgpu_winsys_get_fd
;
814 ws
->base
.buffer_set_metadata
= radv_amdgpu_winsys_bo_set_metadata
;
815 ws
->base
.buffer_get_metadata
= radv_amdgpu_winsys_bo_get_metadata
;
816 ws
->base
.buffer_virtual_bind
= radv_amdgpu_winsys_bo_virtual_bind
;
817 ws
->base
.buffer_get_flags_from_fd
= radv_amdgpu_bo_get_flags_from_fd
;