2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include "radv_amdgpu_bo.h"
34 #include <amdgpu_drm.h>
37 #include "util/u_atomic.h"
40 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
);
43 radv_amdgpu_bo_va_op(amdgpu_device_handle dev
,
51 size
= ALIGN(size
, getpagesize());
52 flags
|= (AMDGPU_VM_PAGE_READABLE
|
53 AMDGPU_VM_PAGE_WRITEABLE
|
54 AMDGPU_VM_PAGE_EXECUTABLE
);
55 return amdgpu_bo_va_op_raw(dev
, bo
, offset
, size
, addr
,
60 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo
*bo
,
61 const struct radv_amdgpu_map_range
*range
)
66 return; /* TODO: PRT mapping */
68 p_atomic_inc(&range
->bo
->ref_count
);
69 int r
= radv_amdgpu_bo_va_op(bo
->ws
->dev
, range
->bo
->bo
, range
->bo_offset
, range
->size
,
70 range
->offset
+ bo
->base
.va
, 0, AMDGPU_VA_OP_MAP
);
76 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo
*bo
,
77 const struct radv_amdgpu_map_range
*range
)
82 return; /* TODO: PRT mapping */
84 int r
= radv_amdgpu_bo_va_op(bo
->ws
->dev
, range
->bo
->bo
, range
->bo_offset
, range
->size
,
85 range
->offset
+ bo
->base
.va
, 0, AMDGPU_VA_OP_UNMAP
);
88 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo
*)range
->bo
);
91 static int bo_comparator(const void *ap
, const void *bp
) {
92 struct radv_amdgpu_bo
*a
= *(struct radv_amdgpu_bo
*const *)ap
;
93 struct radv_amdgpu_bo
*b
= *(struct radv_amdgpu_bo
*const *)bp
;
94 return (a
> b
) ? 1 : (a
< b
) ? -1 : 0;
98 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo
*bo
)
100 if (bo
->bo_capacity
< bo
->range_count
) {
101 uint32_t new_count
= MAX2(bo
->bo_capacity
* 2, bo
->range_count
);
102 bo
->bos
= realloc(bo
->bos
, new_count
* sizeof(struct radv_amdgpu_winsys_bo
*));
103 bo
->bo_capacity
= new_count
;
106 uint32_t temp_bo_count
= 0;
107 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
)
108 if (bo
->ranges
[i
].bo
)
109 bo
->bos
[temp_bo_count
++] = bo
->ranges
[i
].bo
;
111 qsort(bo
->bos
, temp_bo_count
, sizeof(struct radv_amdgpu_winsys_bo
*), &bo_comparator
);
113 uint32_t final_bo_count
= 1;
114 for (uint32_t i
= 1; i
< temp_bo_count
; ++i
)
115 if (bo
->bos
[i
] != bo
->bos
[i
- 1])
116 bo
->bos
[final_bo_count
++] = bo
->bos
[i
];
118 bo
->bo_count
= final_bo_count
;
122 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo
*_parent
,
123 uint64_t offset
, uint64_t size
,
124 struct radeon_winsys_bo
*_bo
, uint64_t bo_offset
)
126 struct radv_amdgpu_winsys_bo
*parent
= (struct radv_amdgpu_winsys_bo
*)_parent
;
127 struct radv_amdgpu_winsys_bo
*bo
= (struct radv_amdgpu_winsys_bo
*)_bo
;
128 int range_count_delta
, new_idx
;
130 struct radv_amdgpu_map_range new_first
, new_last
;
132 assert(parent
->is_virtual
);
133 assert(!bo
|| !bo
->is_virtual
);
138 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
139 if (parent
->range_capacity
- parent
->range_count
< 2) {
140 parent
->range_capacity
+= 2;
141 parent
->ranges
= realloc(parent
->ranges
,
142 parent
->range_capacity
* sizeof(struct radv_amdgpu_map_range
));
146 * [first, last] is exactly the range of ranges that either overlap the
147 * new parent, or are adjacent to it. This corresponds to the bind ranges
150 while(first
+ 1 < parent
->range_count
&& parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
< offset
)
154 while(last
+ 1 < parent
->range_count
&& parent
->ranges
[last
].offset
<= offset
+ size
)
157 /* Whether the first or last range are going to be totally removed or just
158 * resized/left alone. Note that in the case of first == last, we will split
159 * this into a part before and after the new range. The remove flag is then
160 * whether to not create the corresponding split part. */
161 bool remove_first
= parent
->ranges
[first
].offset
== offset
;
162 bool remove_last
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
== offset
+ size
;
163 bool unmapped_first
= false;
165 assert(parent
->ranges
[first
].offset
<= offset
);
166 assert(parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
>= offset
+ size
);
168 /* Try to merge the new range with the first range. */
169 if (parent
->ranges
[first
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[first
].offset
- parent
->ranges
[first
].bo_offset
)) {
170 size
+= offset
- parent
->ranges
[first
].offset
;
171 offset
= parent
->ranges
[first
].offset
;
172 bo_offset
= parent
->ranges
[first
].bo_offset
;
176 /* Try to merge the new range with the last range. */
177 if (parent
->ranges
[last
].bo
== bo
&& (!bo
|| offset
- bo_offset
== parent
->ranges
[last
].offset
- parent
->ranges
[last
].bo_offset
)) {
178 size
= parent
->ranges
[last
].offset
+ parent
->ranges
[last
].size
- offset
;
182 range_count_delta
= 1 - (last
- first
+ 1) + !remove_first
+ !remove_last
;
183 new_idx
= first
+ !remove_first
;
185 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
186 for (int i
= first
+ 1; i
< last
; ++i
)
187 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ i
);
189 /* If the first/last range are not left alone we unmap then and optionally map
190 * them again after modifications. Not that this implicitly can do the splitting
191 * if first == last. */
192 new_first
= parent
->ranges
[first
];
193 new_last
= parent
->ranges
[last
];
195 if (parent
->ranges
[first
].offset
+ parent
->ranges
[first
].size
> offset
|| remove_first
) {
196 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ first
);
197 unmapped_first
= true;
200 new_first
.size
= offset
- new_first
.offset
;
201 radv_amdgpu_winsys_virtual_map(parent
, &new_first
);
205 if (parent
->ranges
[last
].offset
< offset
+ size
|| remove_last
) {
206 if (first
!= last
|| !unmapped_first
)
207 radv_amdgpu_winsys_virtual_unmap(parent
, parent
->ranges
+ last
);
210 new_last
.size
-= offset
+ size
- new_last
.offset
;
211 new_last
.offset
= offset
+ size
;
212 radv_amdgpu_winsys_virtual_map(parent
, &new_last
);
216 /* Moves the range list after last to account for the changed number of ranges. */
217 memmove(parent
->ranges
+ last
+ 1 + range_count_delta
, parent
->ranges
+ last
+ 1,
218 sizeof(struct radv_amdgpu_map_range
) * (parent
->range_count
- last
- 1));
221 parent
->ranges
[first
] = new_first
;
224 parent
->ranges
[new_idx
+ 1] = new_last
;
226 /* Actually set up the new range. */
227 parent
->ranges
[new_idx
].offset
= offset
;
228 parent
->ranges
[new_idx
].size
= size
;
229 parent
->ranges
[new_idx
].bo
= bo
;
230 parent
->ranges
[new_idx
].bo_offset
= bo_offset
;
232 radv_amdgpu_winsys_virtual_map(parent
, parent
->ranges
+ new_idx
);
234 parent
->range_count
+= range_count_delta
;
236 radv_amdgpu_winsys_rebuild_bo_list(parent
);
239 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo
*_bo
)
241 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
243 if (p_atomic_dec_return(&bo
->ref_count
))
245 if (bo
->is_virtual
) {
246 for (uint32_t i
= 0; i
< bo
->range_count
; ++i
) {
247 radv_amdgpu_winsys_virtual_unmap(bo
, bo
->ranges
+ i
);
252 if (bo
->ws
->debug_all_bos
) {
253 pthread_mutex_lock(&bo
->ws
->global_bo_list_lock
);
254 LIST_DEL(&bo
->global_list_item
);
255 bo
->ws
->num_buffers
--;
256 pthread_mutex_unlock(&bo
->ws
->global_bo_list_lock
);
258 radv_amdgpu_bo_va_op(bo
->ws
->dev
, bo
->bo
, 0, bo
->size
, bo
->base
.va
, 0, AMDGPU_VA_OP_UNMAP
);
259 amdgpu_bo_free(bo
->bo
);
261 amdgpu_va_range_free(bo
->va_handle
);
265 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo
*bo
)
267 struct radv_amdgpu_winsys
*ws
= bo
->ws
;
269 if (bo
->ws
->debug_all_bos
) {
270 pthread_mutex_lock(&ws
->global_bo_list_lock
);
271 LIST_ADDTAIL(&bo
->global_list_item
, &ws
->global_bo_list
);
273 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
277 static struct radeon_winsys_bo
*
278 radv_amdgpu_winsys_bo_create(struct radeon_winsys
*_ws
,
281 enum radeon_bo_domain initial_domain
,
284 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
285 struct radv_amdgpu_winsys_bo
*bo
;
286 struct amdgpu_bo_alloc_request request
= {0};
287 amdgpu_bo_handle buf_handle
;
289 amdgpu_va_handle va_handle
;
291 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
296 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
297 size
, alignment
, 0, &va
, &va_handle
, 0);
302 bo
->va_handle
= va_handle
;
305 bo
->is_virtual
= !!(flags
& RADEON_FLAG_VIRTUAL
);
308 if (flags
& RADEON_FLAG_VIRTUAL
) {
309 bo
->ranges
= realloc(NULL
, sizeof(struct radv_amdgpu_map_range
));
311 bo
->range_capacity
= 1;
313 bo
->ranges
[0].offset
= 0;
314 bo
->ranges
[0].size
= size
;
315 bo
->ranges
[0].bo
= NULL
;
316 bo
->ranges
[0].bo_offset
= 0;
318 radv_amdgpu_winsys_virtual_map(bo
, bo
->ranges
);
319 return (struct radeon_winsys_bo
*)bo
;
322 request
.alloc_size
= size
;
323 request
.phys_alignment
= alignment
;
325 if (initial_domain
& RADEON_DOMAIN_VRAM
)
326 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_VRAM
;
327 if (initial_domain
& RADEON_DOMAIN_GTT
)
328 request
.preferred_heap
|= AMDGPU_GEM_DOMAIN_GTT
;
330 if (flags
& RADEON_FLAG_CPU_ACCESS
)
331 request
.flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
332 if (flags
& RADEON_FLAG_NO_CPU_ACCESS
)
333 request
.flags
|= AMDGPU_GEM_CREATE_NO_CPU_ACCESS
;
334 if (flags
& RADEON_FLAG_GTT_WC
)
335 request
.flags
|= AMDGPU_GEM_CREATE_CPU_GTT_USWC
;
336 if (!(flags
& RADEON_FLAG_IMPLICIT_SYNC
) && ws
->info
.drm_minor
>= 22)
337 request
.flags
|= AMDGPU_GEM_CREATE_EXPLICIT_SYNC
;
338 if (flags
& RADEON_FLAG_NO_INTERPROCESS_SHARING
&& ws
->info
.drm_minor
>= 20 && ws
->use_local_bos
) {
339 bo
->base
.is_local
= true;
340 request
.flags
|= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
;
343 /* this won't do anything on pre 4.9 kernels */
344 if (ws
->zero_all_vram_allocs
&& (initial_domain
& RADEON_DOMAIN_VRAM
))
345 request
.flags
|= AMDGPU_GEM_CREATE_VRAM_CLEARED
;
346 r
= amdgpu_bo_alloc(ws
->dev
, &request
, &buf_handle
);
348 fprintf(stderr
, "amdgpu: Failed to allocate a buffer:\n");
349 fprintf(stderr
, "amdgpu: size : %"PRIu64
" bytes\n", size
);
350 fprintf(stderr
, "amdgpu: alignment : %u bytes\n", alignment
);
351 fprintf(stderr
, "amdgpu: domains : %u\n", initial_domain
);
356 uint32_t va_flags
= 0;
357 if ((flags
& RADEON_FLAG_VA_UNCACHED
) && ws
->info
.chip_class
>= GFX9
)
358 va_flags
|= AMDGPU_VM_MTYPE_UC
;
359 r
= radv_amdgpu_bo_va_op(ws
->dev
, buf_handle
, 0, size
, va
, va_flags
, AMDGPU_VA_OP_MAP
);
364 bo
->initial_domain
= initial_domain
;
365 bo
->is_shared
= false;
366 radv_amdgpu_add_buffer_to_global_list(bo
);
367 return (struct radeon_winsys_bo
*)bo
;
369 amdgpu_bo_free(buf_handle
);
372 amdgpu_va_range_free(va_handle
);
380 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo
*_bo
)
382 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
385 ret
= amdgpu_bo_cpu_map(bo
->bo
, &data
);
392 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo
*_bo
)
394 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
395 amdgpu_bo_cpu_unmap(bo
->bo
);
398 static struct radeon_winsys_bo
*
399 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys
*_ws
,
400 int fd
, unsigned *stride
,
403 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
404 struct radv_amdgpu_winsys_bo
*bo
;
406 amdgpu_va_handle va_handle
;
407 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
408 struct amdgpu_bo_import_result result
= {0};
409 struct amdgpu_bo_info info
= {0};
410 enum radeon_bo_domain initial
= 0;
412 bo
= CALLOC_STRUCT(radv_amdgpu_winsys_bo
);
416 r
= amdgpu_bo_import(ws
->dev
, type
, fd
, &result
);
420 r
= amdgpu_bo_query_info(result
.buf_handle
, &info
);
424 r
= amdgpu_va_range_alloc(ws
->dev
, amdgpu_gpu_va_range_general
,
425 result
.alloc_size
, 1 << 20, 0, &va
, &va_handle
, 0);
429 r
= radv_amdgpu_bo_va_op(ws
->dev
, result
.buf_handle
, 0, result
.alloc_size
, va
, 0, AMDGPU_VA_OP_MAP
);
433 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_VRAM
)
434 initial
|= RADEON_DOMAIN_VRAM
;
435 if (info
.preferred_heap
& AMDGPU_GEM_DOMAIN_GTT
)
436 initial
|= RADEON_DOMAIN_GTT
;
438 bo
->bo
= result
.buf_handle
;
440 bo
->va_handle
= va_handle
;
441 bo
->initial_domain
= initial
;
442 bo
->size
= result
.alloc_size
;
443 bo
->is_shared
= true;
445 radv_amdgpu_add_buffer_to_global_list(bo
);
446 return (struct radeon_winsys_bo
*)bo
;
448 amdgpu_va_range_free(va_handle
);
451 amdgpu_bo_free(result
.buf_handle
);
459 radv_amdgpu_winsys_get_fd(struct radeon_winsys
*_ws
,
460 struct radeon_winsys_bo
*_bo
,
463 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
464 enum amdgpu_bo_handle_type type
= amdgpu_bo_handle_type_dma_buf_fd
;
467 r
= amdgpu_bo_export(bo
->bo
, type
, &handle
);
472 bo
->is_shared
= true;
476 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split
)
478 switch (eg_tile_split
) {
491 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo
*_bo
,
492 struct radeon_bo_metadata
*md
)
494 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
495 struct amdgpu_bo_metadata metadata
= {0};
496 uint32_t tiling_flags
= 0;
498 if (bo
->ws
->info
.chip_class
>= GFX9
) {
499 tiling_flags
|= AMDGPU_TILING_SET(SWIZZLE_MODE
, md
->u
.gfx9
.swizzle_mode
);
501 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
502 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 4); /* 2D_TILED_THIN1 */
503 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
504 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 2); /* 1D_TILED_THIN1 */
506 tiling_flags
|= AMDGPU_TILING_SET(ARRAY_MODE
, 1); /* LINEAR_ALIGNED */
508 tiling_flags
|= AMDGPU_TILING_SET(PIPE_CONFIG
, md
->u
.legacy
.pipe_config
);
509 tiling_flags
|= AMDGPU_TILING_SET(BANK_WIDTH
, util_logbase2(md
->u
.legacy
.bankw
));
510 tiling_flags
|= AMDGPU_TILING_SET(BANK_HEIGHT
, util_logbase2(md
->u
.legacy
.bankh
));
511 if (md
->u
.legacy
.tile_split
)
512 tiling_flags
|= AMDGPU_TILING_SET(TILE_SPLIT
, radv_eg_tile_split_rev(md
->u
.legacy
.tile_split
));
513 tiling_flags
|= AMDGPU_TILING_SET(MACRO_TILE_ASPECT
, util_logbase2(md
->u
.legacy
.mtilea
));
514 tiling_flags
|= AMDGPU_TILING_SET(NUM_BANKS
, util_logbase2(md
->u
.legacy
.num_banks
)-1);
516 if (md
->u
.legacy
.scanout
)
517 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 0); /* DISPLAY_MICRO_TILING */
519 tiling_flags
|= AMDGPU_TILING_SET(MICRO_TILE_MODE
, 1); /* THIN_MICRO_TILING */
522 metadata
.tiling_info
= tiling_flags
;
523 metadata
.size_metadata
= md
->size_metadata
;
524 memcpy(metadata
.umd_metadata
, md
->metadata
, sizeof(md
->metadata
));
526 amdgpu_bo_set_metadata(bo
->bo
, &metadata
);
529 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys
*ws
)
531 ws
->base
.buffer_create
= radv_amdgpu_winsys_bo_create
;
532 ws
->base
.buffer_destroy
= radv_amdgpu_winsys_bo_destroy
;
533 ws
->base
.buffer_map
= radv_amdgpu_winsys_bo_map
;
534 ws
->base
.buffer_unmap
= radv_amdgpu_winsys_bo_unmap
;
535 ws
->base
.buffer_from_fd
= radv_amdgpu_winsys_bo_from_fd
;
536 ws
->base
.buffer_get_fd
= radv_amdgpu_winsys_get_fd
;
537 ws
->base
.buffer_set_metadata
= radv_amdgpu_winsys_bo_set_metadata
;
538 ws
->base
.buffer_virtual_bind
= radv_amdgpu_winsys_bo_virtual_bind
;