2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include "radv_amdgpu_bo.h"
34 #include "drm-uapi/amdgpu_drm.h"
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
43 #define AMDGPU_TILING_SCANOUT_SHIFT 63
44 #define AMDGPU_TILING_SCANOUT_MASK 1
46 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
);
49 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys
*ws
,
57 uint64_t flags
= AMDGPU_VM_PAGE_READABLE
|
58 AMDGPU_VM_PAGE_EXECUTABLE
;
60 if ((bo_flags
& RADEON_FLAG_VA_UNCACHED
) && ws
->info
.chip_class
>= GFX9
)
61 flags
|= AMDGPU_VM_MTYPE_UC
;
63 if (!(bo_flags
& RADEON_FLAG_READ_ONLY
))
64 flags
|= AMDGPU_VM_PAGE_WRITEABLE
;
66 size
= align64(size
, getpagesize());
68 return amdgpu_bo_va_op_raw(ws
->dev
, bo
, offset
, size
, addr
,
73 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo
*bo
,
74 const struct radv_amdgpu_map_range
*range
)
79 return; /* TODO: PRT mapping */
81 p_atomic_inc(&range
->bo
->ref_count
);
82 int r
= radv_amdgpu_bo_va_op(bo
->ws
, range
->bo
->bo
, range
->bo_offset
,
83 range
->size
, range
->offset
+ bo
->base
.va
,
90 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo
*bo
,
91 const struct radv_amdgpu_map_range
*range
)
96 return; /* TODO: PRT mapping */
98 int r
= radv_amdgpu_bo_va_op(bo
->ws
, range
->bo
->bo
, range
->bo_offset
,
99 range
->size
, range
->offset
+ bo
->base
.va
,
100 0, AMDGPU_VA_OP_UNMAP
);
103 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo
*)range
->bo
);
106 static int bo_comparator(const void *ap
, const void *bp
) {
107 struct radv_amdgpu_bo
*a
= *(struct radv_amdgpu_bo
*const *)ap
;
108 struct radv_amdgpu_bo
*b
= *(struct radv_amdgpu_bo
*const *)bp
;
109 return (a
> b
) ? 1 : (a
< b
) ? -1 : 0;
113 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo
*bo
)
115 if (bo
->bo_capacity
< bo
->range_count
) {
116 uint32_t new_count
= MAX2(bo
->bo_capacity
* 2, bo
->range_count
);
117 bo
->bos
= realloc(bo
->bos
, new_count
* sizeof(struct radv_amdgpu_winsys_bo
*));
118 bo
->bo_capacity
= new_count
;
121 uint32_t temp_bo_count
= 0;
122 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
)
123 if (bo
->ranges
[i
].bo
)
124 bo
->bos
[temp_bo_count
++] = bo
->ranges
[i
].bo
;
126 qsort(bo
->bos
, temp_bo_count
, sizeof(struct radv_amdgpu_winsys_bo
*), &bo_comparator
);
128 uint32_t final_bo_count
= 1;
129 for (uint32_t i
= 1; i
< temp_bo_count
; ++i
)
130 if (bo
->bos
[i
] != bo
->bos
[i
- 1])
131 bo
->bos
[final_bo_count
++] = bo
->bos
[i
];
133 bo
->bo_count
= final_bo_count
;
137 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo
*_parent
,
138 uint64_t offset
, uint64_t size
,
139 struct radeon_winsys_bo
*_bo
, uint64_t bo_offset
)
141 struct radv_amdgpu_winsys_bo
*parent
= (struct radv_amdgpu_winsys_bo
*)_parent
;
142 struct radv_amdgpu_winsys_bo
*bo
= (struct radv_amdgpu_winsys_bo
*)_bo
;
143 int range_count_delta
, new_idx
;
145 struct radv_amdgpu_map_range new_first
, new_last
;
147 assert(parent
->is_virtual
);
148 assert(!bo
|| !bo
->is_virtual
);
153 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
154 if (parent
->range_capacity
- parent
->range_count
< 2) {
155 parent
->range_capacity
+= 2;
156 parent
->ranges
= realloc(parent
->ranges
,
157 parent
->range_capacity
* sizeof(struct radv_amdgpu_map_range
));
161 * [first, last] is exactly the range of ranges that either overlap the
162 * new parent, or are adjacent to it. This corresponds to the bind ranges
165 while(first
+ 1 < parent
->range_count
&& parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
< offset
)
169 while(last
+ 1 < parent
->range_count
&& parent
->ranges
[last
].offset
<= offset
+ size
)
172 /* Whether the first or last range are going to be totally removed or just
173 * resized/left alone. Note that in the case of first == last, we will split
174 * this into a part before and after the new range. The remove flag is then
175 * whether to not create the corresponding split part. */
176 bool remove_first
= parent
->ranges
[first
].offset
== offset
;
177 bool remove_last
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
== offset
+ size
;
178 bool unmapped_first
= false;
180 assert(parent
->ranges
[first
].offset
<= offset
);
181 assert(parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
>= offset
+ size
);
183 /* Try to merge the new range with the first range. */
184 if (parent
->ranges
[first
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[first
].offset
- parent
->ranges
[first
].bo_offset
)) {
185 size
+= offset
- parent
->ranges
[first
].offset
;
186 offset
= parent
->ranges
[first
].offset
;
187 bo_offset
= parent
->ranges
[first
].bo_offset
;
191 /* Try to merge the new range with the last range. */
192 if (parent
->ranges
[last
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[last
].offset
- parent
->ranges
[last
].bo_offset
)) {
193 size
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
- offset
;
197 range_count_delta
= 1 - (last
- first
+ 1) + !remove_first
+ !remove_last
;
198 new_idx
= first
+ !remove_first
;
200 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
201 for (int i
= first
+ 1; i
< last
; ++i
)
202 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ i
);
204 /* If the first/last range are not left alone we unmap then and optionally map
205 * them again after modifications. Not that this implicitly can do the splitting
206 * if first == last. */
207 new_first
= parent
->ranges
[first
];
208 new_last
= parent
->ranges
[last
];
210 if (parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
> offset
|| remove_first
) {
211 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ first
);
212 unmapped_first
= true;
215 new_first
.size
= offset
- new_first
.offset
;
216 radv_amdgpu_winsys_virtual_map(parent
, &new_first
);
220 if (parent
->ranges
[last
].offset
< offset
+ size
|| remove_last
) {
221 if (first
!= last
|| !unmapped_first
)
222 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ last
);
225 new_last
.size
-= offset
+ size
- new_last
.offset
;
226 new_last
.offset
= offset
+ size
;
227 radv_amdgpu_winsys_virtual_map(parent
, &new_last
);
231 /* Moves the range list after last to account for the changed number of ranges. */
232 memmove(parent
->ranges
+ last
+ 1 + range_count_delta
, parent
->ranges
+ last
+ 1,
233 sizeof(struct radv_amdgpu_map_range
) * (parent
->range_count
- last
- 1));
236 parent
->ranges
[first
] = new_first
;
239 parent
->ranges
[new_idx
+ 1] = new_last
;
241 /* Actually set up the new range. */
242 parent
->ranges
[new_idx
].offset
= offset
;
243 parent
->ranges
[new_idx
].size
= size
;
244 parent
->ranges
[new_idx
].bo
= bo
;
245 parent
->ranges
[new_idx
].bo_offset
= bo_offset
;
247 radv_amdgpu_winsys_virtual_map(parent
, parent
->ranges
+ new_idx
);
249 parent
->range_count
+= range_count_delta
;
251 radv_amdgpu_winsys_rebuild_bo_list(parent
);
254 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
)
256 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
257 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
259 if (p_atomic_dec_return(&bo
->ref_count
))
261 if (bo
->is_virtual
) {
262 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
) {
263 radv_amdgpu_winsys_virtual_unmap(bo
, bo
->ranges
+ i
);
268 if (bo
->ws
->debug_all_bos
) {
269 pthread_mutex_lock(&bo
->ws
->global_bo_list_lock
);
270 list_del(&bo
->global_list_item
);
271 bo
->ws
->num_buffers
--;
272 pthread_mutex_unlock(&bo
->ws
->global_bo_list_lock
);
274 radv_amdgpu_bo_va_op(bo
->ws
, bo
->bo
, 0, bo
->size
, bo
->base
.va
,
275 0, AMDGPU_VA_OP_UNMAP
);
276 amdgpu_bo_free(bo
->bo
);
279 if (bo
->initial_domain
& RADEON_DOMAIN_VRAM
) {
280 if (bo
->base
.vram_no_cpu_access
) {
281 p_atomic_add(&ws
->allocated_vram
,
282 -align64(bo
->size
, ws
->info
.gart_page_size
));
284 p_atomic_add(&ws
->allocated_vram_vis
,
285 -align64(bo
->size
, ws
->info
.gart_page_size
));
289 if (bo
->initial_domain
& RADEON_DOMAIN_GTT
)
290 p_atomic_add(&ws
->allocated_gtt
,
291 -align64(bo
->size
, ws
->info
.gart_page_size
));
293 amdgpu_va_range_free(bo
->va_handle
);
297 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo
*bo
)
299 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
301 if (bo
->ws
->debug_all_bos
) {
302 pthread_mutex_lock(&ws
->global_bo_list_lock
);
303 list_addtail(&bo
->global_list_item
, &ws
->global_bo_list
);
305 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
309 static struct radeon_winsys_bo
*
310 radv_amdgpu_winsys_bo_create(struct radeon_winsys
*_ws
,
313 enum radeon_bo_domain initial_domain
,
317 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
318 struct radv_amdgpu_winsys_bo
*bo
;
319 struct amdgpu_bo_alloc_request request
= {0};
320 amdgpu_bo_handle buf_handle
;
322 amdgpu_va_handle va_handle
;
324 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
329 unsigned virt_alignment
= alignment
;
330 if (size
>= ws
->info
.pte_fragment_size
)
331 virt_alignment
= MAX2(virt_alignment
, ws
->info
.pte_fragment_size
);
333 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
334 size
, virt_alignment
, 0, &va
, &va_handle
,
335 (flags
& RADEON_FLAG_32BIT
? AMDGPU_VA_RANGE_32_BIT
: 0) |
336 AMDGPU_VA_RANGE_HIGH
);
341 bo
->va_handle
= va_handle
;
344 bo
->is_virtual
= !!(flags
& RADEON_FLAG_VIRTUAL
);
347 if (flags
& RADEON_FLAG_VIRTUAL
) {
348 bo
->ranges
= realloc(NULL
, sizeof(struct radv_amdgpu_map_range
));
350 bo
->range_capacity
= 1;
352 bo
->ranges
[0].offset
= 0;
353 bo
->ranges
[0].size
= size
;
354 bo
->ranges
[0].bo
= NULL
;
355 bo
->ranges
[0].bo_offset
= 0;
357 radv_amdgpu_winsys_virtual_map(bo
, bo
->ranges
);
358 return (struct radeon_winsys_bo
*)bo
;
361 request
.alloc_size
= size
;
362 request
.phys_alignment
= alignment
;
364 if (initial_domain
& RADEON_DOMAIN_VRAM
)
365 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_VRAM
;
366 if (initial_domain
& RADEON_DOMAIN_GTT
)
367 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GTT
;
368 if (initial_domain
& RADEON_DOMAIN_GDS
)
369 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GDS
;
370 if (initial_domain
& RADEON_DOMAIN_OA
)
371 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_OA
;
373 if (flags
& RADEON_FLAG_CPU_ACCESS
)
374 request
.flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
375 if (flags
& RADEON_FLAG_NO_CPU_ACCESS
) {
376 bo
->base
.vram_no_cpu_access
= initial_domain
& RADEON_DOMAIN_VRAM
;
377 request
.flags
|= AMDGPU_GEM_CREATE_NO_CPU_ACCESS
;
379 if (flags
& RADEON_FLAG_GTT_WC
)
380 request
.flags
|= AMDGPU_GEM_CREATE_CPU_GTT_USWC
;
381 if (!(flags
& RADEON_FLAG_IMPLICIT_SYNC
) && ws
->info
.drm_minor
>= 22)
382 request
.flags
|= AMDGPU_GEM_CREATE_EXPLICIT_SYNC
;
383 if (flags
& RADEON_FLAG_NO_INTERPROCESS_SHARING
&&
384 ws
->info
.has_local_buffers
&&
385 (ws
->use_local_bos
|| (flags
& RADEON_FLAG_PREFER_LOCAL_BO
))) {
386 bo
->base
.is_local
= true;
387 request
.flags
|= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
;
390 /* this won't do anything on pre 4.9 kernels */
391 if (initial_domain
& RADEON_DOMAIN_VRAM
) {
392 if (ws
->zero_all_vram_allocs
|| (flags
& RADEON_FLAG_ZERO_VRAM
))
393 request
.flags
|= AMDGPU_GEM_CREATE_VRAM_CLEARED
;
396 r
= amdgpu_bo_alloc(ws
->dev
, &request
, &buf_handle
);
398 fprintf(stderr
, "amdgpu: Failed to allocate a buffer:\n");
399 fprintf(stderr
, "amdgpu: size : %"PRIu64
" bytes\n", size
);
400 fprintf(stderr
, "amdgpu: alignment : %u bytes\n", alignment
);
401 fprintf(stderr
, "amdgpu: domains : %u\n", initial_domain
);
405 r
= radv_amdgpu_bo_va_op(ws
, buf_handle
, 0, size
, va
, flags
,
411 bo
->initial_domain
= initial_domain
;
412 bo
->is_shared
= false;
413 bo
->priority
= priority
;
415 r
= amdgpu_bo_export(buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
418 if (initial_domain
& RADEON_DOMAIN_VRAM
) {
419 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
420 * aren't mappable and they are counted as part of the VRAM
423 * Otherwise, buffers with the CPU_ACCESS flag or without any
424 * of both (imported buffers) are counted as part of the VRAM
425 * visible counter because they can be mapped.
427 if (bo
->base
.vram_no_cpu_access
) {
428 p_atomic_add(&ws
->allocated_vram
,
429 align64(bo
->size
, ws
->info
.gart_page_size
));
431 p_atomic_add(&ws
->allocated_vram_vis
,
432 align64(bo
->size
, ws
->info
.gart_page_size
));
436 if (initial_domain
& RADEON_DOMAIN_GTT
)
437 p_atomic_add(&ws
->allocated_gtt
,
438 align64(bo
->size
, ws
->info
.gart_page_size
));
440 radv_amdgpu_add_buffer_to_global_list(bo
);
441 return (struct radeon_winsys_bo
*)bo
;
443 amdgpu_bo_free(buf_handle
);
446 amdgpu_va_range_free(va_handle
);
454 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo
*_bo
)
456 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
459 ret
= amdgpu_bo_cpu_map(bo
->bo
, &data
);
466 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo
*_bo
)
468 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
469 amdgpu_bo_cpu_unmap(bo
->bo
);
473 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys
*ws
,
474 uint64_t size
, unsigned alignment
)
476 uint64_t vm_alignment
= alignment
;
478 /* Increase the VM alignment for faster address translation. */
479 if (size
>= ws
->info
.pte_fragment_size
)
480 vm_alignment
= MAX2(vm_alignment
, ws
->info
.pte_fragment_size
);
482 /* Gfx9: Increase the VM alignment to the most significant bit set
483 * in the size for faster address translation.
485 if (ws
->info
.chip_class
>= GFX9
) {
486 unsigned msb
= util_last_bit64(size
); /* 0 = no bit is set */
487 uint64_t msb_alignment
= msb
? 1ull << (msb
- 1) : 0;
489 vm_alignment
= MAX2(vm_alignment
, msb_alignment
);
494 static struct radeon_winsys_bo
*
495 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys
*_ws
,
500 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
501 amdgpu_bo_handle buf_handle
;
502 struct radv_amdgpu_winsys_bo
*bo
;
504 amdgpu_va_handle va_handle
;
505 uint64_t vm_alignment
;
507 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
511 if (amdgpu_create_bo_from_user_mem(ws
->dev
, pointer
, size
, &buf_handle
))
514 /* Using the optimal VM alignment also fixes GPU hangs for buffers that
517 vm_alignment
= radv_amdgpu_get_optimal_vm_alignment(ws
, size
,
518 ws
->info
.gart_page_size
);
520 if (amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
521 size
, vm_alignment
, 0, &va
, &va_handle
,
522 AMDGPU_VA_RANGE_HIGH
))
525 if (amdgpu_bo_va_op(buf_handle
, 0, size
, va
, 0, AMDGPU_VA_OP_MAP
))
530 bo
->va_handle
= va_handle
;
535 bo
->initial_domain
= RADEON_DOMAIN_GTT
;
536 bo
->priority
= priority
;
538 ASSERTED
int r
= amdgpu_bo_export(buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
541 p_atomic_add(&ws
->allocated_gtt
,
542 align64(bo
->size
, ws
->info
.gart_page_size
));
544 radv_amdgpu_add_buffer_to_global_list(bo
);
545 return (struct radeon_winsys_bo
*)bo
;
548 amdgpu_va_range_free(va_handle
);
551 amdgpu_bo_free(buf_handle
);
558 static struct radeon_winsys_bo
*
559 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys
*_ws
,
560 int fd
, unsigned priority
,
561 uint64_t *alloc_size
)
563 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
564 struct radv_amdgpu_winsys_bo
*bo
;
566 amdgpu_va_handle va_handle
;
567 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
568 struct amdgpu_bo_import_result result
= {0};
569 struct amdgpu_bo_info info
= {0};
570 enum radeon_bo_domain initial
= 0;
572 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
576 r
= amdgpu_bo_import(ws
->dev
, type
, fd
, &result
);
580 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
585 *alloc_size
= info
.alloc_size
;
588 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
589 result
.alloc_size
, 1 << 20, 0, &va
, &va_handle
,
590 AMDGPU_VA_RANGE_HIGH
);
594 r
= radv_amdgpu_bo_va_op(ws
, result
.buf_handle
, 0, result
.alloc_size
,
595 va
, 0, AMDGPU_VA_OP_MAP
);
599 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
600 initial
|= RADEON_DOMAIN_VRAM
;
601 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
602 initial
|= RADEON_DOMAIN_GTT
;
604 bo
->bo
= result
.buf_handle
;
606 bo
->va_handle
= va_handle
;
607 bo
->initial_domain
= initial
;
608 bo
->size
= result
.alloc_size
;
609 bo
->is_shared
= true;
611 bo
->priority
= priority
;
614 r
= amdgpu_bo_export(result
.buf_handle
, amdgpu_bo_handle_type_kms
, &bo
->bo_handle
);
617 if (bo
->initial_domain
& RADEON_DOMAIN_VRAM
)
618 p_atomic_add(&ws
->allocated_vram
,
619 align64(bo
->size
, ws
->info
.gart_page_size
));
620 if (bo
->initial_domain
& RADEON_DOMAIN_GTT
)
621 p_atomic_add(&ws
->allocated_gtt
,
622 align64(bo
->size
, ws
->info
.gart_page_size
));
624 radv_amdgpu_add_buffer_to_global_list(bo
);
625 return (struct radeon_winsys_bo
*)bo
;
627 amdgpu_va_range_free(va_handle
);
630 amdgpu_bo_free(result
.buf_handle
);
638 radv_amdgpu_winsys_get_fd(struct radeon_winsys
*_ws
,
639 struct radeon_winsys_bo
*_bo
,
642 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
643 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
646 r
= amdgpu_bo_export(bo
->bo
, type
, &handle
);
651 bo
->is_shared
= true;
656 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys
*_ws
, int fd
,
657 enum radeon_bo_domain
*domains
,
658 enum radeon_bo_flag
*flags
)
660 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
661 struct amdgpu_bo_import_result result
= {0};
662 struct amdgpu_bo_info info
= {0};
668 r
= amdgpu_bo_import(ws
->dev
, amdgpu_bo_handle_type_dma_buf_fd
, fd
, &result
);
672 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
673 amdgpu_bo_free(result
.buf_handle
);
677 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
678 *domains
|= RADEON_DOMAIN_VRAM
;
679 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
680 *domains
|= RADEON_DOMAIN_GTT
;
681 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GDS
)
682 *domains
|= RADEON_DOMAIN_GDS
;
683 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_OA
)
684 *domains
|= RADEON_DOMAIN_OA
;
686 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
)
687 *flags
|= RADEON_FLAG_CPU_ACCESS
;
688 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_NO_CPU_ACCESS
)
689 *flags
|= RADEON_FLAG_NO_CPU_ACCESS
;
690 if (!(info
.alloc_flags
& AMDGPU_GEM_CREATE_EXPLICIT_SYNC
))
691 *flags
|= RADEON_FLAG_IMPLICIT_SYNC
;
692 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_CPU_GTT_USWC
)
693 *flags
|= RADEON_FLAG_GTT_WC
;
694 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
)
695 *flags
|= RADEON_FLAG_NO_INTERPROCESS_SHARING
| RADEON_FLAG_PREFER_LOCAL_BO
;
696 if (info
.alloc_flags
& AMDGPU_GEM_CREATE_VRAM_CLEARED
)
697 *flags
|= RADEON_FLAG_ZERO_VRAM
;
701 static unsigned eg_tile_split(unsigned tile_split
)
703 switch (tile_split
) {
704 case 0: tile_split
= 64; break;
705 case 1: tile_split
= 128; break;
706 case 2: tile_split
= 256; break;
707 case 3: tile_split
= 512; break;
709 case 4: tile_split
= 1024; break;
710 case 5: tile_split
= 2048; break;
711 case 6: tile_split
= 4096; break;
716 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split
)
718 switch (eg_tile_split
) {
731 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo
*_bo
,
732 struct radeon_bo_metadata
*md
)
734 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
735 struct amdgpu_bo_metadata metadata
= {0};
736 uint64_t tiling_flags
= 0;
738 if (bo
->ws
->info
.chip_class
>= GFX9
) {
739 tiling_flags
|= AMDGPU_TILING_SET(SWIZZLE_MODE
, md
->u
.gfx9
.swizzle_mode
);
740 tiling_flags
|= AMDGPU_TILING_SET(SCANOUT
, md
->u
.gfx9
.scanout
);
742 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
743 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 4); /* 2D_TILED_THIN1 */
744 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
745 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 2); /* 1D_TILED_THIN1 */
747 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 1); /* LINEAR_ALIGNED */
749 tiling_flags
|= AMDGPU_TILING_SET(PIPE_CONFIG
, md
->u
.legacy
.pipe_config
);
750 tiling_flags
|= AMDGPU_TILING_SET(BANK_WIDTH
, util_logbase2(md
->u
.legacy
.bankw
));
751 tiling_flags
|= AMDGPU_TILING_SET(BANK_HEIGHT
, util_logbase2(md
->u
.legacy
.bankh
));
752 if (md
->u
.legacy
.tile_split
)
753 tiling_flags
|= AMDGPU_TILING_SET(TILE_SPLIT
, radv_eg_tile_split_rev(md
->u
.legacy
.tile_split
));
754 tiling_flags
|= AMDGPU_TILING_SET(MACRO_TILE_ASPECT
, util_logbase2(md
->u
.legacy
.mtilea
));
755 tiling_flags
|= AMDGPU_TILING_SET(NUM_BANKS
, util_logbase2(md
->u
.legacy
.num_banks
)-1);
757 if (md
->u
.legacy
.scanout
)
758 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 0); /* DISPLAY_MICRO_TILING */
760 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 1); /* THIN_MICRO_TILING */
763 metadata
.tiling_info
= tiling_flags
;
764 metadata
.size_metadata
= md
->size_metadata
;
765 memcpy(metadata
.umd_metadata
, md
->metadata
, sizeof(md
->metadata
));
767 amdgpu_bo_set_metadata(bo
->bo
, &metadata
);
771 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo
*_bo
,
772 struct radeon_bo_metadata
*md
)
774 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
775 struct amdgpu_bo_info info
= {0};
777 int r
= amdgpu_bo_query_info(bo
->bo
, &info
);
781 uint64_t tiling_flags
= info
.metadata
.tiling_info
;
783 if (bo
->ws
->info
.chip_class
>= GFX9
) {
784 md
->u
.gfx9
.swizzle_mode
= AMDGPU_TILING_GET(tiling_flags
, SWIZZLE_MODE
);
785 md
->u
.gfx9
.scanout
= AMDGPU_TILING_GET(tiling_flags
, SCANOUT
);
787 md
->u
.legacy
.microtile
= RADEON_LAYOUT_LINEAR
;
788 md
->u
.legacy
.macrotile
= RADEON_LAYOUT_LINEAR
;
790 if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 4) /* 2D_TILED_THIN1 */
791 md
->u
.legacy
.macrotile
= RADEON_LAYOUT_TILED
;
792 else if (AMDGPU_TILING_GET(tiling_flags
, ARRAY_MODE
) == 2) /* 1D_TILED_THIN1 */
793 md
->u
.legacy
.microtile
= RADEON_LAYOUT_TILED
;
795 md
->u
.legacy
.pipe_config
= AMDGPU_TILING_GET(tiling_flags
, PIPE_CONFIG
);
796 md
->u
.legacy
.bankw
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_WIDTH
);
797 md
->u
.legacy
.bankh
= 1 << AMDGPU_TILING_GET(tiling_flags
, BANK_HEIGHT
);
798 md
->u
.legacy
.tile_split
= eg_tile_split(AMDGPU_TILING_GET(tiling_flags
, TILE_SPLIT
));
799 md
->u
.legacy
.mtilea
= 1 << AMDGPU_TILING_GET(tiling_flags
, MACRO_TILE_ASPECT
);
800 md
->u
.legacy
.num_banks
= 2 << AMDGPU_TILING_GET(tiling_flags
, NUM_BANKS
);
801 md
->u
.legacy
.scanout
= AMDGPU_TILING_GET(tiling_flags
, MICRO_TILE_MODE
) == 0; /* DISPLAY */
804 md
->size_metadata
= info
.metadata
.size_metadata
;
805 memcpy(md
->metadata
, info
.metadata
.umd_metadata
, sizeof(md
->metadata
));
808 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys
*ws
)
810 ws
->base
.buffer_create
= radv_amdgpu_winsys_bo_create
;
811 ws
->base
.buffer_destroy
= radv_amdgpu_winsys_bo_destroy
;
812 ws
->base
.buffer_map
= radv_amdgpu_winsys_bo_map
;
813 ws
->base
.buffer_unmap
= radv_amdgpu_winsys_bo_unmap
;
814 ws
->base
.buffer_from_ptr
= radv_amdgpu_winsys_bo_from_ptr
;
815 ws
->base
.buffer_from_fd
= radv_amdgpu_winsys_bo_from_fd
;
816 ws
->base
.buffer_get_fd
= radv_amdgpu_winsys_get_fd
;
817 ws
->base
.buffer_set_metadata
= radv_amdgpu_winsys_bo_set_metadata
;
818 ws
->base
.buffer_get_metadata
= radv_amdgpu_winsys_bo_get_metadata
;
819 ws
->base
.buffer_virtual_bind
= radv_amdgpu_winsys_bo_virtual_bind
;
820 ws
->base
.buffer_get_flags_from_fd
= radv_amdgpu_bo_get_flags_from_fd
;