radv: move local bos usage to a perftest flag.
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_bo.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based on amdgpu winsys.
6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7 * Copyright © 2015 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * IN THE SOFTWARE.
27 */
28
29 #include <stdio.h>
30
31 #include "radv_amdgpu_bo.h"
32
33 #include <amdgpu.h>
34 #include <amdgpu_drm.h>
35 #include <inttypes.h>
36
37 #include "util/u_atomic.h"
38
39
40 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
41
42 static int
43 radv_amdgpu_bo_va_op(amdgpu_device_handle dev,
44 amdgpu_bo_handle bo,
45 uint64_t offset,
46 uint64_t size,
47 uint64_t addr,
48 uint64_t flags,
49 uint32_t ops)
50 {
51 size = ALIGN(size, getpagesize());
52 flags |= (AMDGPU_VM_PAGE_READABLE |
53 AMDGPU_VM_PAGE_WRITEABLE |
54 AMDGPU_VM_PAGE_EXECUTABLE);
55 return amdgpu_bo_va_op_raw(dev, bo, offset, size, addr,
56 flags, ops);
57 }
58
59 static void
60 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
61 const struct radv_amdgpu_map_range *range)
62 {
63 assert(range->size);
64
65 if (!range->bo)
66 return; /* TODO: PRT mapping */
67
68 p_atomic_inc(&range->bo->ref_count);
69 int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
70 range->offset + bo->base.va, 0, AMDGPU_VA_OP_MAP);
71 if (r)
72 abort();
73 }
74
75 static void
76 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
77 const struct radv_amdgpu_map_range *range)
78 {
79 assert(range->size);
80
81 if (!range->bo)
82 return; /* TODO: PRT mapping */
83
84 int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
85 range->offset + bo->base.va, 0, AMDGPU_VA_OP_UNMAP);
86 if (r)
87 abort();
88 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
89 }
90
91 static int bo_comparator(const void *ap, const void *bp) {
92 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
93 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
94 return (a > b) ? 1 : (a < b) ? -1 : 0;
95 }
96
97 static void
98 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
99 {
100 if (bo->bo_capacity < bo->range_count) {
101 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
102 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
103 bo->bo_capacity = new_count;
104 }
105
106 uint32_t temp_bo_count = 0;
107 for (uint32_t i = 0; i < bo->range_count; ++i)
108 if (bo->ranges[i].bo)
109 bo->bos[temp_bo_count++] = bo->ranges[i].bo;
110
111 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
112
113 uint32_t final_bo_count = 1;
114 for (uint32_t i = 1; i < temp_bo_count; ++i)
115 if (bo->bos[i] != bo->bos[i - 1])
116 bo->bos[final_bo_count++] = bo->bos[i];
117
118 bo->bo_count = final_bo_count;
119 }
120
121 static void
122 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
123 uint64_t offset, uint64_t size,
124 struct radeon_winsys_bo *_bo, uint64_t bo_offset)
125 {
126 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
127 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
128 int range_count_delta, new_idx;
129 int first = 0, last;
130 struct radv_amdgpu_map_range new_first, new_last;
131
132 assert(parent->is_virtual);
133 assert(!bo || !bo->is_virtual);
134
135 if (!size)
136 return;
137
138 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
139 if (parent->range_capacity - parent->range_count < 2) {
140 parent->range_capacity += 2;
141 parent->ranges = realloc(parent->ranges,
142 parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
143 }
144
145 /*
146 * [first, last] is exactly the range of ranges that either overlap the
147 * new parent, or are adjacent to it. This corresponds to the bind ranges
148 * that may change.
149 */
150 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
151 ++first;
152
153 last = first;
154 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
155 ++last;
156
157 /* Whether the first or last range are going to be totally removed or just
158 * resized/left alone. Note that in the case of first == last, we will split
159 * this into a part before and after the new range. The remove flag is then
160 * whether to not create the corresponding split part. */
161 bool remove_first = parent->ranges[first].offset == offset;
162 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
163 bool unmapped_first = false;
164
165 assert(parent->ranges[first].offset <= offset);
166 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
167
168 /* Try to merge the new range with the first range. */
169 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
170 size += offset - parent->ranges[first].offset;
171 offset = parent->ranges[first].offset;
172 bo_offset = parent->ranges[first].bo_offset;
173 remove_first = true;
174 }
175
176 /* Try to merge the new range with the last range. */
177 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
178 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
179 remove_last = true;
180 }
181
182 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
183 new_idx = first + !remove_first;
184
185 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
186 for (int i = first + 1; i < last; ++i)
187 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
188
189 /* If the first/last range are not left alone we unmap then and optionally map
190 * them again after modifications. Not that this implicitly can do the splitting
191 * if first == last. */
192 new_first = parent->ranges[first];
193 new_last = parent->ranges[last];
194
195 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
196 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
197 unmapped_first = true;
198
199 if (!remove_first) {
200 new_first.size = offset - new_first.offset;
201 radv_amdgpu_winsys_virtual_map(parent, &new_first);
202 }
203 }
204
205 if (parent->ranges[last].offset < offset + size || remove_last) {
206 if (first != last || !unmapped_first)
207 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
208
209 if (!remove_last) {
210 new_last.size -= offset + size - new_last.offset;
211 new_last.offset = offset + size;
212 radv_amdgpu_winsys_virtual_map(parent, &new_last);
213 }
214 }
215
216 /* Moves the range list after last to account for the changed number of ranges. */
217 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
218 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
219
220 if (!remove_first)
221 parent->ranges[first] = new_first;
222
223 if (!remove_last)
224 parent->ranges[new_idx + 1] = new_last;
225
226 /* Actually set up the new range. */
227 parent->ranges[new_idx].offset = offset;
228 parent->ranges[new_idx].size = size;
229 parent->ranges[new_idx].bo = bo;
230 parent->ranges[new_idx].bo_offset = bo_offset;
231
232 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
233
234 parent->range_count += range_count_delta;
235
236 radv_amdgpu_winsys_rebuild_bo_list(parent);
237 }
238
239 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
240 {
241 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
242
243 if (p_atomic_dec_return(&bo->ref_count))
244 return;
245 if (bo->is_virtual) {
246 for (uint32_t i = 0; i < bo->range_count; ++i) {
247 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
248 }
249 free(bo->bos);
250 free(bo->ranges);
251 } else {
252 if (bo->ws->debug_all_bos) {
253 pthread_mutex_lock(&bo->ws->global_bo_list_lock);
254 LIST_DEL(&bo->global_list_item);
255 bo->ws->num_buffers--;
256 pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
257 }
258 radv_amdgpu_bo_va_op(bo->ws->dev, bo->bo, 0, bo->size, bo->base.va, 0, AMDGPU_VA_OP_UNMAP);
259 amdgpu_bo_free(bo->bo);
260 }
261 amdgpu_va_range_free(bo->va_handle);
262 FREE(bo);
263 }
264
265 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
266 {
267 struct radv_amdgpu_winsys *ws = bo->ws;
268
269 if (bo->ws->debug_all_bos) {
270 pthread_mutex_lock(&ws->global_bo_list_lock);
271 LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
272 ws->num_buffers++;
273 pthread_mutex_unlock(&ws->global_bo_list_lock);
274 }
275 }
276
277 static struct radeon_winsys_bo *
278 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
279 uint64_t size,
280 unsigned alignment,
281 enum radeon_bo_domain initial_domain,
282 unsigned flags)
283 {
284 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
285 struct radv_amdgpu_winsys_bo *bo;
286 struct amdgpu_bo_alloc_request request = {0};
287 amdgpu_bo_handle buf_handle;
288 uint64_t va = 0;
289 amdgpu_va_handle va_handle;
290 int r;
291 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
292 if (!bo) {
293 return NULL;
294 }
295
296 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
297 size, alignment, 0, &va, &va_handle, 0);
298 if (r)
299 goto error_va_alloc;
300
301 bo->base.va = va;
302 bo->va_handle = va_handle;
303 bo->size = size;
304 bo->ws = ws;
305 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
306 bo->ref_count = 1;
307
308 if (flags & RADEON_FLAG_VIRTUAL) {
309 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
310 bo->range_count = 1;
311 bo->range_capacity = 1;
312
313 bo->ranges[0].offset = 0;
314 bo->ranges[0].size = size;
315 bo->ranges[0].bo = NULL;
316 bo->ranges[0].bo_offset = 0;
317
318 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
319 return (struct radeon_winsys_bo *)bo;
320 }
321
322 request.alloc_size = size;
323 request.phys_alignment = alignment;
324
325 if (initial_domain & RADEON_DOMAIN_VRAM)
326 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
327 if (initial_domain & RADEON_DOMAIN_GTT)
328 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
329
330 if (flags & RADEON_FLAG_CPU_ACCESS)
331 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
332 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
333 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
334 if (flags & RADEON_FLAG_GTT_WC)
335 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
336 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
337 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
338 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && ws->info.drm_minor >= 20 && ws->use_local_bos) {
339 bo->base.is_local = true;
340 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
341 }
342
343 /* this won't do anything on pre 4.9 kernels */
344 if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM))
345 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
346 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
347 if (r) {
348 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
349 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
350 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
351 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
352 goto error_bo_alloc;
353 }
354
355
356 uint32_t va_flags = 0;
357 if ((flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
358 va_flags |= AMDGPU_VM_MTYPE_UC;
359 r = radv_amdgpu_bo_va_op(ws->dev, buf_handle, 0, size, va, va_flags, AMDGPU_VA_OP_MAP);
360 if (r)
361 goto error_va_map;
362
363 bo->bo = buf_handle;
364 bo->initial_domain = initial_domain;
365 bo->is_shared = false;
366 radv_amdgpu_add_buffer_to_global_list(bo);
367 return (struct radeon_winsys_bo *)bo;
368 error_va_map:
369 amdgpu_bo_free(buf_handle);
370
371 error_bo_alloc:
372 amdgpu_va_range_free(va_handle);
373
374 error_va_alloc:
375 FREE(bo);
376 return NULL;
377 }
378
379 static void *
380 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
381 {
382 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
383 int ret;
384 void *data;
385 ret = amdgpu_bo_cpu_map(bo->bo, &data);
386 if (ret)
387 return NULL;
388 return data;
389 }
390
391 static void
392 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
393 {
394 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
395 amdgpu_bo_cpu_unmap(bo->bo);
396 }
397
398 static struct radeon_winsys_bo *
399 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
400 int fd, unsigned *stride,
401 unsigned *offset)
402 {
403 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
404 struct radv_amdgpu_winsys_bo *bo;
405 uint64_t va;
406 amdgpu_va_handle va_handle;
407 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
408 struct amdgpu_bo_import_result result = {0};
409 struct amdgpu_bo_info info = {0};
410 enum radeon_bo_domain initial = 0;
411 int r;
412 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
413 if (!bo)
414 return NULL;
415
416 r = amdgpu_bo_import(ws->dev, type, fd, &result);
417 if (r)
418 goto error;
419
420 r = amdgpu_bo_query_info(result.buf_handle, &info);
421 if (r)
422 goto error_query;
423
424 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
425 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
426 if (r)
427 goto error_query;
428
429 r = radv_amdgpu_bo_va_op(ws->dev, result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
430 if (r)
431 goto error_va_map;
432
433 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
434 initial |= RADEON_DOMAIN_VRAM;
435 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
436 initial |= RADEON_DOMAIN_GTT;
437
438 bo->bo = result.buf_handle;
439 bo->base.va = va;
440 bo->va_handle = va_handle;
441 bo->initial_domain = initial;
442 bo->size = result.alloc_size;
443 bo->is_shared = true;
444 bo->ws = ws;
445 radv_amdgpu_add_buffer_to_global_list(bo);
446 return (struct radeon_winsys_bo *)bo;
447 error_va_map:
448 amdgpu_va_range_free(va_handle);
449
450 error_query:
451 amdgpu_bo_free(result.buf_handle);
452
453 error:
454 FREE(bo);
455 return NULL;
456 }
457
458 static bool
459 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
460 struct radeon_winsys_bo *_bo,
461 int *fd)
462 {
463 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
464 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
465 int r;
466 unsigned handle;
467 r = amdgpu_bo_export(bo->bo, type, &handle);
468 if (r)
469 return false;
470
471 *fd = (int)handle;
472 bo->is_shared = true;
473 return true;
474 }
475
476 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
477 {
478 switch (eg_tile_split) {
479 case 64: return 0;
480 case 128: return 1;
481 case 256: return 2;
482 case 512: return 3;
483 default:
484 case 1024: return 4;
485 case 2048: return 5;
486 case 4096: return 6;
487 }
488 }
489
490 static void
491 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
492 struct radeon_bo_metadata *md)
493 {
494 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
495 struct amdgpu_bo_metadata metadata = {0};
496 uint32_t tiling_flags = 0;
497
498 if (bo->ws->info.chip_class >= GFX9) {
499 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
500 } else {
501 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
502 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
503 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
504 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
505 else
506 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
507
508 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
509 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
510 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
511 if (md->u.legacy.tile_split)
512 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
513 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
514 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
515
516 if (md->u.legacy.scanout)
517 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
518 else
519 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
520 }
521
522 metadata.tiling_info = tiling_flags;
523 metadata.size_metadata = md->size_metadata;
524 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
525
526 amdgpu_bo_set_metadata(bo->bo, &metadata);
527 }
528
529 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
530 {
531 ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
532 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
533 ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
534 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
535 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
536 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
537 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
538 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
539 }