ec2fa3a56e4f5eaee88a6ff88f1851c6df773495
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27
28 #include "amdgpu_cs.h"
29
30 #include "util/hash_table.h"
31 #include "util/os_time.h"
32 #include "util/u_hash_table.h"
33 #include "state_tracker/drm_driver.h"
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <xf86drm.h>
36 #include <stdio.h>
37 #include <inttypes.h>
38
39 #ifndef AMDGPU_VA_RANGE_HIGH
40 #define AMDGPU_VA_RANGE_HIGH 0x2
41 #endif
42
43 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
44 #define DEBUG_SPARSE_COMMITS 0
45
46 struct amdgpu_sparse_backing_chunk {
47 uint32_t begin, end;
48 };
49
50 static void amdgpu_bo_unmap(struct pb_buffer *buf);
51
52 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
53 enum radeon_bo_usage usage)
54 {
55 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
56 struct amdgpu_winsys *ws = bo->ws;
57 int64_t abs_timeout;
58
59 if (timeout == 0) {
60 if (p_atomic_read(&bo->num_active_ioctls))
61 return false;
62
63 } else {
64 abs_timeout = os_time_get_absolute_timeout(timeout);
65
66 /* Wait if any ioctl is being submitted with this buffer. */
67 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
68 return false;
69 }
70
71 if (bo->is_shared) {
72 /* We can't use user fences for shared buffers, because user fences
73 * are local to this process only. If we want to wait for all buffer
74 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
75 */
76 bool buffer_busy = true;
77 int r;
78
79 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
80 if (r)
81 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
82 r);
83 return !buffer_busy;
84 }
85
86 if (timeout == 0) {
87 unsigned idle_fences;
88 bool buffer_idle;
89
90 simple_mtx_lock(&ws->bo_fence_lock);
91
92 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
93 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
94 break;
95 }
96
97 /* Release the idle fences to avoid checking them again later. */
98 for (unsigned i = 0; i < idle_fences; ++i)
99 amdgpu_fence_reference(&bo->fences[i], NULL);
100
101 memmove(&bo->fences[0], &bo->fences[idle_fences],
102 (bo->num_fences - idle_fences) * sizeof(*bo->fences));
103 bo->num_fences -= idle_fences;
104
105 buffer_idle = !bo->num_fences;
106 simple_mtx_unlock(&ws->bo_fence_lock);
107
108 return buffer_idle;
109 } else {
110 bool buffer_idle = true;
111
112 simple_mtx_lock(&ws->bo_fence_lock);
113 while (bo->num_fences && buffer_idle) {
114 struct pipe_fence_handle *fence = NULL;
115 bool fence_idle = false;
116
117 amdgpu_fence_reference(&fence, bo->fences[0]);
118
119 /* Wait for the fence. */
120 simple_mtx_unlock(&ws->bo_fence_lock);
121 if (amdgpu_fence_wait(fence, abs_timeout, true))
122 fence_idle = true;
123 else
124 buffer_idle = false;
125 simple_mtx_lock(&ws->bo_fence_lock);
126
127 /* Release an idle fence to avoid checking it again later, keeping in
128 * mind that the fence array may have been modified by other threads.
129 */
130 if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
131 amdgpu_fence_reference(&bo->fences[0], NULL);
132 memmove(&bo->fences[0], &bo->fences[1],
133 (bo->num_fences - 1) * sizeof(*bo->fences));
134 bo->num_fences--;
135 }
136
137 amdgpu_fence_reference(&fence, NULL);
138 }
139 simple_mtx_unlock(&ws->bo_fence_lock);
140
141 return buffer_idle;
142 }
143 }
144
145 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
146 struct pb_buffer *buf)
147 {
148 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
149 }
150
151 static enum radeon_bo_flag amdgpu_bo_get_flags(
152 struct pb_buffer *buf)
153 {
154 return ((struct amdgpu_winsys_bo*)buf)->flags;
155 }
156
157 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
158 {
159 for (unsigned i = 0; i < bo->num_fences; ++i)
160 amdgpu_fence_reference(&bo->fences[i], NULL);
161
162 FREE(bo->fences);
163 bo->num_fences = 0;
164 bo->max_fences = 0;
165 }
166
167 void amdgpu_bo_destroy(struct pb_buffer *_buf)
168 {
169 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
170 struct amdgpu_screen_winsys *sws_iter;
171 struct amdgpu_winsys *ws = bo->ws;
172
173 assert(bo->bo && "must not be called for slab entries");
174
175 if (!bo->is_user_ptr && bo->cpu_ptr) {
176 bo->cpu_ptr = NULL;
177 amdgpu_bo_unmap(&bo->base);
178 }
179 assert(bo->is_user_ptr || bo->u.real.map_count == 0);
180
181 if (ws->debug_all_bos) {
182 simple_mtx_lock(&ws->global_bo_list_lock);
183 list_del(&bo->u.real.global_list_item);
184 ws->num_buffers--;
185 simple_mtx_unlock(&ws->global_bo_list_lock);
186 }
187
188 /* Close all KMS handles retrieved for other DRM file descriptions */
189 simple_mtx_lock(&ws->sws_list_lock);
190 for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) {
191 struct hash_entry *entry;
192
193 if (!sws_iter->kms_handles)
194 continue;
195
196 entry = _mesa_hash_table_search(sws_iter->kms_handles, bo);
197 if (entry) {
198 struct drm_gem_close args = { .handle = (uintptr_t)entry->data };
199
200 drmIoctl(sws_iter->fd, DRM_IOCTL_GEM_CLOSE, &args);
201 _mesa_hash_table_remove(sws_iter->kms_handles, entry);
202 }
203 }
204 simple_mtx_unlock(&ws->sws_list_lock);
205
206 simple_mtx_lock(&ws->bo_export_table_lock);
207 _mesa_hash_table_remove_key(ws->bo_export_table, bo->bo);
208 simple_mtx_unlock(&ws->bo_export_table_lock);
209
210 if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) {
211 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
212 amdgpu_va_range_free(bo->u.real.va_handle);
213 }
214 amdgpu_bo_free(bo->bo);
215
216 amdgpu_bo_remove_fences(bo);
217
218 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
219 ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size);
220 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
221 ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
222
223 simple_mtx_destroy(&bo->lock);
224 FREE(bo);
225 }
226
227 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
228 {
229 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
230
231 assert(bo->bo); /* slab buffers have a separate vtbl */
232
233 if (bo->u.real.use_reusable_pool)
234 pb_cache_add_buffer(&bo->u.real.cache_entry);
235 else
236 amdgpu_bo_destroy(_buf);
237 }
238
239 static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws)
240 {
241 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++)
242 pb_slabs_reclaim(&ws->bo_slabs[i]);
243
244 pb_cache_release_all_buffers(&ws->bo_cache);
245 }
246
247 static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
248 {
249 assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
250 int r = amdgpu_bo_cpu_map(bo->bo, cpu);
251 if (r) {
252 /* Clean up buffer managers and try again. */
253 amdgpu_clean_up_buffer_managers(bo->ws);
254 r = amdgpu_bo_cpu_map(bo->bo, cpu);
255 if (r)
256 return false;
257 }
258
259 if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
260 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
261 bo->ws->mapped_vram += bo->base.size;
262 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
263 bo->ws->mapped_gtt += bo->base.size;
264 bo->ws->num_mapped_buffers++;
265 }
266
267 return true;
268 }
269
270 void *amdgpu_bo_map(struct pb_buffer *buf,
271 struct radeon_cmdbuf *rcs,
272 enum pipe_transfer_usage usage)
273 {
274 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
275 struct amdgpu_winsys_bo *real;
276 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
277
278 assert(!bo->sparse);
279
280 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
281 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
282 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
283 if (usage & PIPE_TRANSFER_DONTBLOCK) {
284 if (!(usage & PIPE_TRANSFER_WRITE)) {
285 /* Mapping for read.
286 *
287 * Since we are mapping for read, we don't need to wait
288 * if the GPU is using the buffer for read too
289 * (neither one is changing it).
290 *
291 * Only check whether the buffer is being used for write. */
292 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
293 RADEON_USAGE_WRITE)) {
294 cs->flush_cs(cs->flush_data,
295 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
296 return NULL;
297 }
298
299 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
300 RADEON_USAGE_WRITE)) {
301 return NULL;
302 }
303 } else {
304 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
305 cs->flush_cs(cs->flush_data,
306 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
307 return NULL;
308 }
309
310 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
311 RADEON_USAGE_READWRITE)) {
312 return NULL;
313 }
314 }
315 } else {
316 uint64_t time = os_time_get_nano();
317
318 if (!(usage & PIPE_TRANSFER_WRITE)) {
319 /* Mapping for read.
320 *
321 * Since we are mapping for read, we don't need to wait
322 * if the GPU is using the buffer for read too
323 * (neither one is changing it).
324 *
325 * Only check whether the buffer is being used for write. */
326 if (cs) {
327 if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
328 RADEON_USAGE_WRITE)) {
329 cs->flush_cs(cs->flush_data,
330 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
331 } else {
332 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
333 if (p_atomic_read(&bo->num_active_ioctls))
334 amdgpu_cs_sync_flush(rcs);
335 }
336 }
337
338 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
339 RADEON_USAGE_WRITE);
340 } else {
341 /* Mapping for write. */
342 if (cs) {
343 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
344 cs->flush_cs(cs->flush_data,
345 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
346 } else {
347 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
348 if (p_atomic_read(&bo->num_active_ioctls))
349 amdgpu_cs_sync_flush(rcs);
350 }
351 }
352
353 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
354 RADEON_USAGE_READWRITE);
355 }
356
357 bo->ws->buffer_wait_time += os_time_get_nano() - time;
358 }
359 }
360
361 /* Buffer synchronization has been checked, now actually map the buffer. */
362 void *cpu = NULL;
363 uint64_t offset = 0;
364
365 if (bo->bo) {
366 real = bo;
367 } else {
368 real = bo->u.slab.real;
369 offset = bo->va - real->va;
370 }
371
372 if (usage & RADEON_TRANSFER_TEMPORARY) {
373 if (real->is_user_ptr) {
374 cpu = real->cpu_ptr;
375 } else {
376 if (!amdgpu_bo_do_map(real, &cpu))
377 return NULL;
378 }
379 } else {
380 cpu = p_atomic_read(&real->cpu_ptr);
381 if (!cpu) {
382 simple_mtx_lock(&real->lock);
383 /* Must re-check due to the possibility of a race. Re-check need not
384 * be atomic thanks to the lock. */
385 cpu = real->cpu_ptr;
386 if (!cpu) {
387 if (!amdgpu_bo_do_map(real, &cpu)) {
388 simple_mtx_unlock(&real->lock);
389 return NULL;
390 }
391 p_atomic_set(&real->cpu_ptr, cpu);
392 }
393 simple_mtx_unlock(&real->lock);
394 }
395 }
396
397 return (uint8_t*)cpu + offset;
398 }
399
400 static void amdgpu_bo_unmap(struct pb_buffer *buf)
401 {
402 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
403 struct amdgpu_winsys_bo *real;
404
405 assert(!bo->sparse);
406
407 if (bo->is_user_ptr)
408 return;
409
410 real = bo->bo ? bo : bo->u.slab.real;
411 assert(real->u.real.map_count != 0 && "too many unmaps");
412 if (p_atomic_dec_zero(&real->u.real.map_count)) {
413 assert(!real->cpu_ptr &&
414 "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag");
415
416 if (real->initial_domain & RADEON_DOMAIN_VRAM)
417 real->ws->mapped_vram -= real->base.size;
418 else if (real->initial_domain & RADEON_DOMAIN_GTT)
419 real->ws->mapped_gtt -= real->base.size;
420 real->ws->num_mapped_buffers--;
421 }
422
423 amdgpu_bo_cpu_unmap(real->bo);
424 }
425
426 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
427 amdgpu_bo_destroy_or_cache
428 /* other functions are never called */
429 };
430
431 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
432 {
433 struct amdgpu_winsys *ws = bo->ws;
434
435 assert(bo->bo);
436
437 if (ws->debug_all_bos) {
438 simple_mtx_lock(&ws->global_bo_list_lock);
439 list_addtail(&bo->u.real.global_list_item, &ws->global_bo_list);
440 ws->num_buffers++;
441 simple_mtx_unlock(&ws->global_bo_list_lock);
442 }
443 }
444
445 static uint64_t amdgpu_get_optimal_vm_alignment(struct amdgpu_winsys *ws,
446 uint64_t size, unsigned alignment)
447 {
448 uint64_t vm_alignment = alignment;
449
450 /* Increase the VM alignment for faster address translation. */
451 if (size >= ws->info.pte_fragment_size)
452 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
453
454 /* Gfx9: Increase the VM alignment to the most significant bit set
455 * in the size for faster address translation.
456 */
457 if (ws->info.chip_class >= GFX9) {
458 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
459 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
460
461 vm_alignment = MAX2(vm_alignment, msb_alignment);
462 }
463 return vm_alignment;
464 }
465
466 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
467 uint64_t size,
468 unsigned alignment,
469 enum radeon_bo_domain initial_domain,
470 unsigned flags,
471 int heap)
472 {
473 struct amdgpu_bo_alloc_request request = {0};
474 amdgpu_bo_handle buf_handle;
475 uint64_t va = 0;
476 struct amdgpu_winsys_bo *bo;
477 amdgpu_va_handle va_handle;
478 int r;
479
480 /* VRAM or GTT must be specified, but not both at the same time. */
481 assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
482 RADEON_DOMAIN_GDS |
483 RADEON_DOMAIN_OA)) == 1);
484
485 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
486 if (!bo) {
487 return NULL;
488 }
489
490 if (heap >= 0) {
491 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
492 heap);
493 }
494 request.alloc_size = size;
495 request.phys_alignment = alignment;
496
497 if (initial_domain & RADEON_DOMAIN_VRAM) {
498 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
499
500 /* Since VRAM and GTT have almost the same performance on APUs, we could
501 * just set GTT. However, in order to decrease GTT(RAM) usage, which is
502 * shared with the OS, allow VRAM placements too. The idea is not to use
503 * VRAM usefully, but to use it so that it's not unused and wasted.
504 */
505 if (!ws->info.has_dedicated_vram)
506 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
507 }
508
509 if (initial_domain & RADEON_DOMAIN_GTT)
510 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
511 if (initial_domain & RADEON_DOMAIN_GDS)
512 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
513 if (initial_domain & RADEON_DOMAIN_OA)
514 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
515
516 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
517 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
518 if (flags & RADEON_FLAG_GTT_WC)
519 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
520 if (ws->zero_all_vram_allocs &&
521 (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
522 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
523
524 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
525 if (r) {
526 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
527 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
528 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
529 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
530 goto error_bo_alloc;
531 }
532
533 if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {
534 unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
535
536 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
537 size + va_gap_size,
538 amdgpu_get_optimal_vm_alignment(ws, size, alignment),
539 0, &va, &va_handle,
540 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
541 AMDGPU_VA_RANGE_HIGH);
542 if (r)
543 goto error_va_alloc;
544
545 unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
546 AMDGPU_VM_PAGE_EXECUTABLE;
547
548 if (!(flags & RADEON_FLAG_READ_ONLY))
549 vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
550
551 r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
552 AMDGPU_VA_OP_MAP);
553 if (r)
554 goto error_va_map;
555 }
556
557 simple_mtx_init(&bo->lock, mtx_plain);
558 pipe_reference_init(&bo->base.reference, 1);
559 bo->base.alignment = alignment;
560 bo->base.usage = 0;
561 bo->base.size = size;
562 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
563 bo->ws = ws;
564 bo->bo = buf_handle;
565 bo->va = va;
566 bo->u.real.va_handle = va_handle;
567 bo->initial_domain = initial_domain;
568 bo->flags = flags;
569 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
570
571 if (initial_domain & RADEON_DOMAIN_VRAM)
572 ws->allocated_vram += align64(size, ws->info.gart_page_size);
573 else if (initial_domain & RADEON_DOMAIN_GTT)
574 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
575
576 amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
577
578 amdgpu_add_buffer_to_global_list(bo);
579
580 return bo;
581
582 error_va_map:
583 amdgpu_va_range_free(va_handle);
584
585 error_va_alloc:
586 amdgpu_bo_free(buf_handle);
587
588 error_bo_alloc:
589 FREE(bo);
590 return NULL;
591 }
592
593 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
594 {
595 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
596
597 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
598 return false;
599 }
600
601 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
602 }
603
604 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
605 {
606 struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
607 bo = container_of(entry, bo, u.slab.entry);
608
609 return amdgpu_bo_can_reclaim(&bo->base);
610 }
611
612 static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size)
613 {
614 /* Find the correct slab allocator for the given size. */
615 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
616 struct pb_slabs *slabs = &ws->bo_slabs[i];
617
618 if (size <= 1 << (slabs->min_order + slabs->num_orders - 1))
619 return slabs;
620 }
621
622 assert(0);
623 return NULL;
624 }
625
626 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
627 {
628 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
629
630 assert(!bo->bo);
631
632 pb_slab_free(get_slabs(bo->ws, bo->base.size), &bo->u.slab.entry);
633 }
634
635 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
636 amdgpu_bo_slab_destroy
637 /* other functions are never called */
638 };
639
640 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
641 unsigned entry_size,
642 unsigned group_index)
643 {
644 struct amdgpu_winsys *ws = priv;
645 struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
646 enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
647 enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
648 uint32_t base_id;
649 unsigned slab_size = 0;
650
651 if (!slab)
652 return NULL;
653
654 /* Determine the slab buffer size. */
655 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
656 struct pb_slabs *slabs = &ws->bo_slabs[i];
657 unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 1);
658
659 if (entry_size <= max_entry_size) {
660 /* The slab size is twice the size of the largest possible entry. */
661 slab_size = max_entry_size * 2;
662
663 /* The largest slab should have the same size as the PTE fragment
664 * size to get faster address translation.
665 */
666 if (i == NUM_SLAB_ALLOCATORS - 1 &&
667 slab_size < ws->info.pte_fragment_size)
668 slab_size = ws->info.pte_fragment_size;
669 break;
670 }
671 }
672 assert(slab_size != 0);
673
674 slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(ws,
675 slab_size, slab_size,
676 domains, flags));
677 if (!slab->buffer)
678 goto fail;
679
680 slab->base.num_entries = slab->buffer->base.size / entry_size;
681 slab->base.num_free = slab->base.num_entries;
682 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
683 if (!slab->entries)
684 goto fail_buffer;
685
686 list_inithead(&slab->base.free);
687
688 base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
689
690 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
691 struct amdgpu_winsys_bo *bo = &slab->entries[i];
692
693 simple_mtx_init(&bo->lock, mtx_plain);
694 bo->base.alignment = entry_size;
695 bo->base.usage = slab->buffer->base.usage;
696 bo->base.size = entry_size;
697 bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
698 bo->ws = ws;
699 bo->va = slab->buffer->va + i * entry_size;
700 bo->initial_domain = domains;
701 bo->unique_id = base_id + i;
702 bo->u.slab.entry.slab = &slab->base;
703 bo->u.slab.entry.group_index = group_index;
704
705 if (slab->buffer->bo) {
706 /* The slab is not suballocated. */
707 bo->u.slab.real = slab->buffer;
708 } else {
709 /* The slab is allocated out of a bigger slab. */
710 bo->u.slab.real = slab->buffer->u.slab.real;
711 assert(bo->u.slab.real->bo);
712 }
713
714 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
715 }
716
717 return &slab->base;
718
719 fail_buffer:
720 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
721 fail:
722 FREE(slab);
723 return NULL;
724 }
725
726 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
727 {
728 struct amdgpu_slab *slab = amdgpu_slab(pslab);
729
730 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
731 amdgpu_bo_remove_fences(&slab->entries[i]);
732 simple_mtx_destroy(&slab->entries[i].lock);
733 }
734
735 FREE(slab->entries);
736 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
737 FREE(slab);
738 }
739
740 #if DEBUG_SPARSE_COMMITS
741 static void
742 sparse_dump(struct amdgpu_winsys_bo *bo, const char *func)
743 {
744 fprintf(stderr, "%s: %p (size=%"PRIu64", num_va_pages=%u) @ %s\n"
745 "Commitments:\n",
746 __func__, bo, bo->base.size, bo->u.sparse.num_va_pages, func);
747
748 struct amdgpu_sparse_backing *span_backing = NULL;
749 uint32_t span_first_backing_page = 0;
750 uint32_t span_first_va_page = 0;
751 uint32_t va_page = 0;
752
753 for (;;) {
754 struct amdgpu_sparse_backing *backing = 0;
755 uint32_t backing_page = 0;
756
757 if (va_page < bo->u.sparse.num_va_pages) {
758 backing = bo->u.sparse.commitments[va_page].backing;
759 backing_page = bo->u.sparse.commitments[va_page].page;
760 }
761
762 if (span_backing &&
763 (backing != span_backing ||
764 backing_page != span_first_backing_page + (va_page - span_first_va_page))) {
765 fprintf(stderr, " %u..%u: backing=%p:%u..%u\n",
766 span_first_va_page, va_page - 1, span_backing,
767 span_first_backing_page,
768 span_first_backing_page + (va_page - span_first_va_page) - 1);
769
770 span_backing = NULL;
771 }
772
773 if (va_page >= bo->u.sparse.num_va_pages)
774 break;
775
776 if (backing && !span_backing) {
777 span_backing = backing;
778 span_first_backing_page = backing_page;
779 span_first_va_page = va_page;
780 }
781
782 va_page++;
783 }
784
785 fprintf(stderr, "Backing:\n");
786
787 list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
788 fprintf(stderr, " %p (size=%"PRIu64")\n", backing, backing->bo->base.size);
789 for (unsigned i = 0; i < backing->num_chunks; ++i)
790 fprintf(stderr, " %u..%u\n", backing->chunks[i].begin, backing->chunks[i].end);
791 }
792 }
793 #endif
794
795 /*
796 * Attempt to allocate the given number of backing pages. Fewer pages may be
797 * allocated (depending on the fragmentation of existing backing buffers),
798 * which will be reflected by a change to *pnum_pages.
799 */
800 static struct amdgpu_sparse_backing *
801 sparse_backing_alloc(struct amdgpu_winsys_bo *bo, uint32_t *pstart_page, uint32_t *pnum_pages)
802 {
803 struct amdgpu_sparse_backing *best_backing;
804 unsigned best_idx;
805 uint32_t best_num_pages;
806
807 best_backing = NULL;
808 best_idx = 0;
809 best_num_pages = 0;
810
811 /* This is a very simple and inefficient best-fit algorithm. */
812 list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
813 for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
814 uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
815 if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
816 (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
817 best_backing = backing;
818 best_idx = idx;
819 best_num_pages = cur_num_pages;
820 }
821 }
822 }
823
824 /* Allocate a new backing buffer if necessary. */
825 if (!best_backing) {
826 struct pb_buffer *buf;
827 uint64_t size;
828 uint32_t pages;
829
830 best_backing = CALLOC_STRUCT(amdgpu_sparse_backing);
831 if (!best_backing)
832 return NULL;
833
834 best_backing->max_chunks = 4;
835 best_backing->chunks = CALLOC(best_backing->max_chunks,
836 sizeof(*best_backing->chunks));
837 if (!best_backing->chunks) {
838 FREE(best_backing);
839 return NULL;
840 }
841
842 assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, RADEON_SPARSE_PAGE_SIZE));
843
844 size = MIN3(bo->base.size / 16,
845 8 * 1024 * 1024,
846 bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * RADEON_SPARSE_PAGE_SIZE);
847 size = MAX2(size, RADEON_SPARSE_PAGE_SIZE);
848
849 buf = amdgpu_bo_create(bo->ws, size, RADEON_SPARSE_PAGE_SIZE,
850 bo->initial_domain,
851 bo->u.sparse.flags | RADEON_FLAG_NO_SUBALLOC);
852 if (!buf) {
853 FREE(best_backing->chunks);
854 FREE(best_backing);
855 return NULL;
856 }
857
858 /* We might have gotten a bigger buffer than requested via caching. */
859 pages = buf->size / RADEON_SPARSE_PAGE_SIZE;
860
861 best_backing->bo = amdgpu_winsys_bo(buf);
862 best_backing->num_chunks = 1;
863 best_backing->chunks[0].begin = 0;
864 best_backing->chunks[0].end = pages;
865
866 list_add(&best_backing->list, &bo->u.sparse.backing);
867 bo->u.sparse.num_backing_pages += pages;
868
869 best_idx = 0;
870 best_num_pages = pages;
871 }
872
873 *pnum_pages = MIN2(*pnum_pages, best_num_pages);
874 *pstart_page = best_backing->chunks[best_idx].begin;
875 best_backing->chunks[best_idx].begin += *pnum_pages;
876
877 if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
878 memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
879 sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
880 best_backing->num_chunks--;
881 }
882
883 return best_backing;
884 }
885
886 static void
887 sparse_free_backing_buffer(struct amdgpu_winsys_bo *bo,
888 struct amdgpu_sparse_backing *backing)
889 {
890 struct amdgpu_winsys *ws = backing->bo->ws;
891
892 bo->u.sparse.num_backing_pages -= backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE;
893
894 simple_mtx_lock(&ws->bo_fence_lock);
895 amdgpu_add_fences(backing->bo, bo->num_fences, bo->fences);
896 simple_mtx_unlock(&ws->bo_fence_lock);
897
898 list_del(&backing->list);
899 amdgpu_winsys_bo_reference(&backing->bo, NULL);
900 FREE(backing->chunks);
901 FREE(backing);
902 }
903
904 /*
905 * Return a range of pages from the given backing buffer back into the
906 * free structure.
907 */
908 static bool
909 sparse_backing_free(struct amdgpu_winsys_bo *bo,
910 struct amdgpu_sparse_backing *backing,
911 uint32_t start_page, uint32_t num_pages)
912 {
913 uint32_t end_page = start_page + num_pages;
914 unsigned low = 0;
915 unsigned high = backing->num_chunks;
916
917 /* Find the first chunk with begin >= start_page. */
918 while (low < high) {
919 unsigned mid = low + (high - low) / 2;
920
921 if (backing->chunks[mid].begin >= start_page)
922 high = mid;
923 else
924 low = mid + 1;
925 }
926
927 assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
928 assert(low == 0 || backing->chunks[low - 1].end <= start_page);
929
930 if (low > 0 && backing->chunks[low - 1].end == start_page) {
931 backing->chunks[low - 1].end = end_page;
932
933 if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
934 backing->chunks[low - 1].end = backing->chunks[low].end;
935 memmove(&backing->chunks[low], &backing->chunks[low + 1],
936 sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
937 backing->num_chunks--;
938 }
939 } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
940 backing->chunks[low].begin = start_page;
941 } else {
942 if (backing->num_chunks >= backing->max_chunks) {
943 unsigned new_max_chunks = 2 * backing->max_chunks;
944 struct amdgpu_sparse_backing_chunk *new_chunks =
945 REALLOC(backing->chunks,
946 sizeof(*backing->chunks) * backing->max_chunks,
947 sizeof(*backing->chunks) * new_max_chunks);
948 if (!new_chunks)
949 return false;
950
951 backing->max_chunks = new_max_chunks;
952 backing->chunks = new_chunks;
953 }
954
955 memmove(&backing->chunks[low + 1], &backing->chunks[low],
956 sizeof(*backing->chunks) * (backing->num_chunks - low));
957 backing->chunks[low].begin = start_page;
958 backing->chunks[low].end = end_page;
959 backing->num_chunks++;
960 }
961
962 if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
963 backing->chunks[0].end == backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE)
964 sparse_free_backing_buffer(bo, backing);
965
966 return true;
967 }
968
969 static void amdgpu_bo_sparse_destroy(struct pb_buffer *_buf)
970 {
971 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
972 int r;
973
974 assert(!bo->bo && bo->sparse);
975
976 r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
977 (uint64_t)bo->u.sparse.num_va_pages * RADEON_SPARSE_PAGE_SIZE,
978 bo->va, 0, AMDGPU_VA_OP_CLEAR);
979 if (r) {
980 fprintf(stderr, "amdgpu: clearing PRT VA region on destroy failed (%d)\n", r);
981 }
982
983 while (!list_is_empty(&bo->u.sparse.backing)) {
984 struct amdgpu_sparse_backing *dummy = NULL;
985 sparse_free_backing_buffer(bo,
986 container_of(bo->u.sparse.backing.next,
987 dummy, list));
988 }
989
990 amdgpu_va_range_free(bo->u.sparse.va_handle);
991 FREE(bo->u.sparse.commitments);
992 simple_mtx_destroy(&bo->lock);
993 FREE(bo);
994 }
995
996 static const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = {
997 amdgpu_bo_sparse_destroy
998 /* other functions are never called */
999 };
1000
1001 static struct pb_buffer *
1002 amdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size,
1003 enum radeon_bo_domain domain,
1004 enum radeon_bo_flag flags)
1005 {
1006 struct amdgpu_winsys_bo *bo;
1007 uint64_t map_size;
1008 uint64_t va_gap_size;
1009 int r;
1010
1011 /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
1012 * that exceed this limit. This is not really a restriction: we don't have
1013 * that much virtual address space anyway.
1014 */
1015 if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE)
1016 return NULL;
1017
1018 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1019 if (!bo)
1020 return NULL;
1021
1022 simple_mtx_init(&bo->lock, mtx_plain);
1023 pipe_reference_init(&bo->base.reference, 1);
1024 bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
1025 bo->base.size = size;
1026 bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl;
1027 bo->ws = ws;
1028 bo->initial_domain = domain;
1029 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1030 bo->sparse = true;
1031 bo->u.sparse.flags = flags & ~RADEON_FLAG_SPARSE;
1032
1033 bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
1034 bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
1035 sizeof(*bo->u.sparse.commitments));
1036 if (!bo->u.sparse.commitments)
1037 goto error_alloc_commitments;
1038
1039 list_inithead(&bo->u.sparse.backing);
1040
1041 /* For simplicity, we always map a multiple of the page size. */
1042 map_size = align64(size, RADEON_SPARSE_PAGE_SIZE);
1043 va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0;
1044 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1045 map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE,
1046 0, &bo->va, &bo->u.sparse.va_handle,
1047 AMDGPU_VA_RANGE_HIGH);
1048 if (r)
1049 goto error_va_alloc;
1050
1051 r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0, size, bo->va,
1052 AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
1053 if (r)
1054 goto error_va_map;
1055
1056 return &bo->base;
1057
1058 error_va_map:
1059 amdgpu_va_range_free(bo->u.sparse.va_handle);
1060 error_va_alloc:
1061 FREE(bo->u.sparse.commitments);
1062 error_alloc_commitments:
1063 simple_mtx_destroy(&bo->lock);
1064 FREE(bo);
1065 return NULL;
1066 }
1067
1068 static bool
1069 amdgpu_bo_sparse_commit(struct pb_buffer *buf, uint64_t offset, uint64_t size,
1070 bool commit)
1071 {
1072 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
1073 struct amdgpu_sparse_commitment *comm;
1074 uint32_t va_page, end_va_page;
1075 bool ok = true;
1076 int r;
1077
1078 assert(bo->sparse);
1079 assert(offset % RADEON_SPARSE_PAGE_SIZE == 0);
1080 assert(offset <= bo->base.size);
1081 assert(size <= bo->base.size - offset);
1082 assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size);
1083
1084 comm = bo->u.sparse.commitments;
1085 va_page = offset / RADEON_SPARSE_PAGE_SIZE;
1086 end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
1087
1088 simple_mtx_lock(&bo->lock);
1089
1090 #if DEBUG_SPARSE_COMMITS
1091 sparse_dump(bo, __func__);
1092 #endif
1093
1094 if (commit) {
1095 while (va_page < end_va_page) {
1096 uint32_t span_va_page;
1097
1098 /* Skip pages that are already committed. */
1099 if (comm[va_page].backing) {
1100 va_page++;
1101 continue;
1102 }
1103
1104 /* Determine length of uncommitted span. */
1105 span_va_page = va_page;
1106 while (va_page < end_va_page && !comm[va_page].backing)
1107 va_page++;
1108
1109 /* Fill the uncommitted span with chunks of backing memory. */
1110 while (span_va_page < va_page) {
1111 struct amdgpu_sparse_backing *backing;
1112 uint32_t backing_start, backing_size;
1113
1114 backing_size = va_page - span_va_page;
1115 backing = sparse_backing_alloc(bo, &backing_start, &backing_size);
1116 if (!backing) {
1117 ok = false;
1118 goto out;
1119 }
1120
1121 r = amdgpu_bo_va_op_raw(bo->ws->dev, backing->bo->bo,
1122 (uint64_t)backing_start * RADEON_SPARSE_PAGE_SIZE,
1123 (uint64_t)backing_size * RADEON_SPARSE_PAGE_SIZE,
1124 bo->va + (uint64_t)span_va_page * RADEON_SPARSE_PAGE_SIZE,
1125 AMDGPU_VM_PAGE_READABLE |
1126 AMDGPU_VM_PAGE_WRITEABLE |
1127 AMDGPU_VM_PAGE_EXECUTABLE,
1128 AMDGPU_VA_OP_REPLACE);
1129 if (r) {
1130 ok = sparse_backing_free(bo, backing, backing_start, backing_size);
1131 assert(ok && "sufficient memory should already be allocated");
1132
1133 ok = false;
1134 goto out;
1135 }
1136
1137 while (backing_size) {
1138 comm[span_va_page].backing = backing;
1139 comm[span_va_page].page = backing_start;
1140 span_va_page++;
1141 backing_start++;
1142 backing_size--;
1143 }
1144 }
1145 }
1146 } else {
1147 r = amdgpu_bo_va_op_raw(bo->ws->dev, NULL, 0,
1148 (uint64_t)(end_va_page - va_page) * RADEON_SPARSE_PAGE_SIZE,
1149 bo->va + (uint64_t)va_page * RADEON_SPARSE_PAGE_SIZE,
1150 AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
1151 if (r) {
1152 ok = false;
1153 goto out;
1154 }
1155
1156 while (va_page < end_va_page) {
1157 struct amdgpu_sparse_backing *backing;
1158 uint32_t backing_start;
1159 uint32_t span_pages;
1160
1161 /* Skip pages that are already uncommitted. */
1162 if (!comm[va_page].backing) {
1163 va_page++;
1164 continue;
1165 }
1166
1167 /* Group contiguous spans of pages. */
1168 backing = comm[va_page].backing;
1169 backing_start = comm[va_page].page;
1170 comm[va_page].backing = NULL;
1171
1172 span_pages = 1;
1173 va_page++;
1174
1175 while (va_page < end_va_page &&
1176 comm[va_page].backing == backing &&
1177 comm[va_page].page == backing_start + span_pages) {
1178 comm[va_page].backing = NULL;
1179 va_page++;
1180 span_pages++;
1181 }
1182
1183 if (!sparse_backing_free(bo, backing, backing_start, span_pages)) {
1184 /* Couldn't allocate tracking data structures, so we have to leak */
1185 fprintf(stderr, "amdgpu: leaking PRT backing memory\n");
1186 ok = false;
1187 }
1188 }
1189 }
1190 out:
1191
1192 simple_mtx_unlock(&bo->lock);
1193
1194 return ok;
1195 }
1196
1197 static unsigned eg_tile_split(unsigned tile_split)
1198 {
1199 switch (tile_split) {
1200 case 0: tile_split = 64; break;
1201 case 1: tile_split = 128; break;
1202 case 2: tile_split = 256; break;
1203 case 3: tile_split = 512; break;
1204 default:
1205 case 4: tile_split = 1024; break;
1206 case 5: tile_split = 2048; break;
1207 case 6: tile_split = 4096; break;
1208 }
1209 return tile_split;
1210 }
1211
1212 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
1213 {
1214 switch (eg_tile_split) {
1215 case 64: return 0;
1216 case 128: return 1;
1217 case 256: return 2;
1218 case 512: return 3;
1219 default:
1220 case 1024: return 4;
1221 case 2048: return 5;
1222 case 4096: return 6;
1223 }
1224 }
1225
1226 #define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44
1227 #define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1
1228 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
1229 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
1230 #define AMDGPU_TILING_SCANOUT_SHIFT 63
1231 #define AMDGPU_TILING_SCANOUT_MASK 0x1
1232
1233 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
1234 struct radeon_bo_metadata *md)
1235 {
1236 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1237 struct amdgpu_bo_info info = {0};
1238 uint64_t tiling_flags;
1239 int r;
1240
1241 assert(bo->bo && "must not be called for slab entries");
1242
1243 r = amdgpu_bo_query_info(bo->bo, &info);
1244 if (r)
1245 return;
1246
1247 tiling_flags = info.metadata.tiling_info;
1248
1249 if (bo->ws->info.chip_class >= GFX9) {
1250 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
1251
1252 md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B);
1253 md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
1254 md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
1255 md->u.gfx9.dcc_independent_128B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
1256 md->u.gfx9.dcc_max_compressed_block_size = AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
1257 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
1258 } else {
1259 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
1260 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
1261
1262 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
1263 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
1264 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
1265 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
1266
1267 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
1268 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
1269 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
1270 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
1271 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
1272 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
1273 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
1274 }
1275
1276 md->size_metadata = info.metadata.size_metadata;
1277 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
1278 }
1279
1280 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
1281 struct radeon_bo_metadata *md)
1282 {
1283 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
1284 struct amdgpu_bo_metadata metadata = {0};
1285 uint64_t tiling_flags = 0;
1286
1287 assert(bo->bo && "must not be called for slab entries");
1288
1289 if (bo->ws->info.chip_class >= GFX9) {
1290 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
1291
1292 tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B);
1293 tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
1294 tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B);
1295 tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128B);
1296 tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
1297 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
1298 } else {
1299 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
1300 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
1301 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
1302 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
1303 else
1304 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
1305
1306 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
1307 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
1308 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
1309 if (md->u.legacy.tile_split)
1310 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->u.legacy.tile_split));
1311 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
1312 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
1313
1314 if (md->u.legacy.scanout)
1315 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
1316 else
1317 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
1318 }
1319
1320 metadata.tiling_info = tiling_flags;
1321 metadata.size_metadata = md->size_metadata;
1322 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
1323
1324 amdgpu_bo_set_metadata(bo->bo, &metadata);
1325 }
1326
1327 struct pb_buffer *
1328 amdgpu_bo_create(struct amdgpu_winsys *ws,
1329 uint64_t size,
1330 unsigned alignment,
1331 enum radeon_bo_domain domain,
1332 enum radeon_bo_flag flags)
1333 {
1334 struct amdgpu_winsys_bo *bo;
1335 int heap = -1;
1336
1337 if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA))
1338 flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC;
1339
1340 /* VRAM implies WC. This is not optional. */
1341 assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
1342
1343 /* NO_CPU_ACCESS is not valid with GTT. */
1344 assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
1345
1346 /* Sparse buffers must have NO_CPU_ACCESS set. */
1347 assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
1348
1349 struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
1350 unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
1351
1352 /* Sub-allocate small buffers from slabs. */
1353 if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
1354 size <= max_slab_entry_size &&
1355 /* The alignment must be at most the size of the smallest slab entry or
1356 * the next power of two. */
1357 alignment <= MAX2(1 << ws->bo_slabs[0].min_order, util_next_power_of_two(size))) {
1358 struct pb_slab_entry *entry;
1359 int heap = radeon_get_heap_index(domain, flags);
1360
1361 if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
1362 goto no_slab;
1363
1364 struct pb_slabs *slabs = get_slabs(ws, size);
1365 entry = pb_slab_alloc(slabs, size, heap);
1366 if (!entry) {
1367 /* Clean up buffer managers and try again. */
1368 amdgpu_clean_up_buffer_managers(ws);
1369
1370 entry = pb_slab_alloc(slabs, size, heap);
1371 }
1372 if (!entry)
1373 return NULL;
1374
1375 bo = NULL;
1376 bo = container_of(entry, bo, u.slab.entry);
1377
1378 pipe_reference_init(&bo->base.reference, 1);
1379
1380 return &bo->base;
1381 }
1382 no_slab:
1383
1384 if (flags & RADEON_FLAG_SPARSE) {
1385 assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
1386
1387 return amdgpu_bo_sparse_create(ws, size, domain, flags);
1388 }
1389
1390 /* This flag is irrelevant for the cache. */
1391 flags &= ~RADEON_FLAG_NO_SUBALLOC;
1392
1393 /* Align size to page size. This is the minimum alignment for normal
1394 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1395 * like constant/uniform buffers, can benefit from better and more reuse.
1396 */
1397 if (domain & RADEON_DOMAIN_VRAM_GTT) {
1398 size = align64(size, ws->info.gart_page_size);
1399 alignment = align(alignment, ws->info.gart_page_size);
1400 }
1401
1402 bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1403
1404 if (use_reusable_pool) {
1405 heap = radeon_get_heap_index(domain, flags);
1406 assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1407
1408 /* Get a buffer from the cache. */
1409 bo = (struct amdgpu_winsys_bo*)
1410 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap);
1411 if (bo)
1412 return &bo->base;
1413 }
1414
1415 /* Create a new one. */
1416 bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
1417 if (!bo) {
1418 /* Clean up buffer managers and try again. */
1419 amdgpu_clean_up_buffer_managers(ws);
1420
1421 bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
1422 if (!bo)
1423 return NULL;
1424 }
1425
1426 bo->u.real.use_reusable_pool = use_reusable_pool;
1427 return &bo->base;
1428 }
1429
1430 static struct pb_buffer *
1431 amdgpu_buffer_create(struct radeon_winsys *ws,
1432 uint64_t size,
1433 unsigned alignment,
1434 enum radeon_bo_domain domain,
1435 enum radeon_bo_flag flags)
1436 {
1437 return amdgpu_bo_create(amdgpu_winsys(ws), size, alignment, domain,
1438 flags);
1439 }
1440
1441 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
1442 struct winsys_handle *whandle,
1443 unsigned vm_alignment)
1444 {
1445 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
1446 struct amdgpu_winsys_bo *bo = NULL;
1447 enum amdgpu_bo_handle_type type;
1448 struct amdgpu_bo_import_result result = {0};
1449 uint64_t va;
1450 amdgpu_va_handle va_handle = NULL;
1451 struct amdgpu_bo_info info = {0};
1452 enum radeon_bo_domain initial = 0;
1453 enum radeon_bo_flag flags = 0;
1454 int r;
1455
1456 switch (whandle->type) {
1457 case WINSYS_HANDLE_TYPE_SHARED:
1458 type = amdgpu_bo_handle_type_gem_flink_name;
1459 break;
1460 case WINSYS_HANDLE_TYPE_FD:
1461 type = amdgpu_bo_handle_type_dma_buf_fd;
1462 break;
1463 default:
1464 return NULL;
1465 }
1466
1467 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
1468 if (r)
1469 return NULL;
1470
1471 simple_mtx_lock(&ws->bo_export_table_lock);
1472 bo = util_hash_table_get(ws->bo_export_table, result.buf_handle);
1473
1474 /* If the amdgpu_winsys_bo instance already exists, bump the reference
1475 * counter and return it.
1476 */
1477 if (bo) {
1478 p_atomic_inc(&bo->base.reference.count);
1479 simple_mtx_unlock(&ws->bo_export_table_lock);
1480
1481 /* Release the buffer handle, because we don't need it anymore.
1482 * This function is returning an existing buffer, which has its own
1483 * handle.
1484 */
1485 amdgpu_bo_free(result.buf_handle);
1486 return &bo->base;
1487 }
1488
1489 /* Get initial domains. */
1490 r = amdgpu_bo_query_info(result.buf_handle, &info);
1491 if (r)
1492 goto error;
1493
1494 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1495 result.alloc_size,
1496 amdgpu_get_optimal_vm_alignment(ws, result.alloc_size,
1497 vm_alignment),
1498 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
1499 if (r)
1500 goto error;
1501
1502 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1503 if (!bo)
1504 goto error;
1505
1506 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
1507 if (r)
1508 goto error;
1509
1510 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
1511 initial |= RADEON_DOMAIN_VRAM;
1512 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
1513 initial |= RADEON_DOMAIN_GTT;
1514 if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
1515 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1516 if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1517 flags |= RADEON_FLAG_GTT_WC;
1518
1519 /* Initialize the structure. */
1520 simple_mtx_init(&bo->lock, mtx_plain);
1521 pipe_reference_init(&bo->base.reference, 1);
1522 bo->base.alignment = info.phys_alignment;
1523 bo->bo = result.buf_handle;
1524 bo->base.size = result.alloc_size;
1525 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
1526 bo->ws = ws;
1527 bo->va = va;
1528 bo->u.real.va_handle = va_handle;
1529 bo->initial_domain = initial;
1530 bo->flags = flags;
1531 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1532 bo->is_shared = true;
1533
1534 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1535 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
1536 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1537 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
1538
1539 amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
1540
1541 amdgpu_add_buffer_to_global_list(bo);
1542
1543 _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo);
1544 simple_mtx_unlock(&ws->bo_export_table_lock);
1545
1546 return &bo->base;
1547
1548 error:
1549 simple_mtx_unlock(&ws->bo_export_table_lock);
1550 if (bo)
1551 FREE(bo);
1552 if (va_handle)
1553 amdgpu_va_range_free(va_handle);
1554 amdgpu_bo_free(result.buf_handle);
1555 return NULL;
1556 }
1557
1558 static bool amdgpu_bo_get_handle(struct radeon_winsys *rws,
1559 struct pb_buffer *buffer,
1560 struct winsys_handle *whandle)
1561 {
1562 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
1563 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
1564 struct amdgpu_winsys *ws = bo->ws;
1565 enum amdgpu_bo_handle_type type;
1566 struct hash_entry *entry;
1567 int r;
1568
1569 /* Don't allow exports of slab entries and sparse buffers. */
1570 if (!bo->bo)
1571 return false;
1572
1573 bo->u.real.use_reusable_pool = false;
1574
1575 switch (whandle->type) {
1576 case WINSYS_HANDLE_TYPE_SHARED:
1577 type = amdgpu_bo_handle_type_gem_flink_name;
1578 break;
1579 case WINSYS_HANDLE_TYPE_KMS:
1580 if (sws->fd == ws->fd) {
1581 whandle->handle = bo->u.real.kms_handle;
1582
1583 if (bo->is_shared)
1584 return true;
1585
1586 goto hash_table_set;
1587 }
1588
1589 simple_mtx_lock(&ws->sws_list_lock);
1590 entry = _mesa_hash_table_search(sws->kms_handles, bo);
1591 simple_mtx_unlock(&ws->sws_list_lock);
1592 if (entry) {
1593 whandle->handle = (uintptr_t)entry->data;
1594 return true;
1595 }
1596 /* Fall through */
1597 case WINSYS_HANDLE_TYPE_FD:
1598 type = amdgpu_bo_handle_type_dma_buf_fd;
1599 break;
1600 default:
1601 return false;
1602 }
1603
1604 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
1605 if (r)
1606 return false;
1607
1608 if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1609 int dma_fd = whandle->handle;
1610
1611 r = drmPrimeFDToHandle(sws->fd, dma_fd, &whandle->handle);
1612 close(dma_fd);
1613
1614 if (r)
1615 return false;
1616
1617 simple_mtx_lock(&ws->sws_list_lock);
1618 _mesa_hash_table_insert_pre_hashed(sws->kms_handles,
1619 bo->u.real.kms_handle, bo,
1620 (void*)(uintptr_t)whandle->handle);
1621 simple_mtx_unlock(&ws->sws_list_lock);
1622 }
1623
1624 hash_table_set:
1625 simple_mtx_lock(&ws->bo_export_table_lock);
1626 _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo);
1627 simple_mtx_unlock(&ws->bo_export_table_lock);
1628
1629 bo->is_shared = true;
1630 return true;
1631 }
1632
1633 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
1634 void *pointer, uint64_t size)
1635 {
1636 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
1637 amdgpu_bo_handle buf_handle;
1638 struct amdgpu_winsys_bo *bo;
1639 uint64_t va;
1640 amdgpu_va_handle va_handle;
1641 /* Avoid failure when the size is not page aligned */
1642 uint64_t aligned_size = align64(size, ws->info.gart_page_size);
1643
1644 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
1645 if (!bo)
1646 return NULL;
1647
1648 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer,
1649 aligned_size, &buf_handle))
1650 goto error;
1651
1652 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
1653 aligned_size,
1654 amdgpu_get_optimal_vm_alignment(ws, aligned_size,
1655 ws->info.gart_page_size),
1656 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH))
1657 goto error_va_alloc;
1658
1659 if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))
1660 goto error_va_map;
1661
1662 /* Initialize it. */
1663 bo->is_user_ptr = true;
1664 pipe_reference_init(&bo->base.reference, 1);
1665 simple_mtx_init(&bo->lock, mtx_plain);
1666 bo->bo = buf_handle;
1667 bo->base.alignment = 0;
1668 bo->base.size = size;
1669 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
1670 bo->ws = ws;
1671 bo->cpu_ptr = pointer;
1672 bo->va = va;
1673 bo->u.real.va_handle = va_handle;
1674 bo->initial_domain = RADEON_DOMAIN_GTT;
1675 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
1676
1677 ws->allocated_gtt += aligned_size;
1678
1679 amdgpu_add_buffer_to_global_list(bo);
1680
1681 amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
1682
1683 return (struct pb_buffer*)bo;
1684
1685 error_va_map:
1686 amdgpu_va_range_free(va_handle);
1687
1688 error_va_alloc:
1689 amdgpu_bo_free(buf_handle);
1690
1691 error:
1692 FREE(bo);
1693 return NULL;
1694 }
1695
1696 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
1697 {
1698 return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
1699 }
1700
1701 static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
1702 {
1703 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
1704
1705 return !bo->bo && !bo->sparse;
1706 }
1707
1708 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
1709 {
1710 return ((struct amdgpu_winsys_bo*)buf)->va;
1711 }
1712
1713 void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws)
1714 {
1715 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
1716 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
1717 ws->base.buffer_map = amdgpu_bo_map;
1718 ws->base.buffer_unmap = amdgpu_bo_unmap;
1719 ws->base.buffer_wait = amdgpu_bo_wait;
1720 ws->base.buffer_create = amdgpu_buffer_create;
1721 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
1722 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
1723 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
1724 ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated;
1725 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
1726 ws->base.buffer_commit = amdgpu_bo_sparse_commit;
1727 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
1728 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
1729 ws->base.buffer_get_flags = amdgpu_bo_get_flags;
1730 }