gallium/radeon/winsyses: reduce the number of pb_cache buckets
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static struct pb_buffer *
42 amdgpu_bo_create(struct radeon_winsys *rws,
43 uint64_t size,
44 unsigned alignment,
45 enum radeon_bo_domain domain,
46 enum radeon_bo_flag flags);
47
48 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
49 enum radeon_bo_usage usage)
50 {
51 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
52 struct amdgpu_winsys *ws = bo->ws;
53 int64_t abs_timeout;
54
55 if (timeout == 0) {
56 if (p_atomic_read(&bo->num_active_ioctls))
57 return false;
58
59 } else {
60 abs_timeout = os_time_get_absolute_timeout(timeout);
61
62 /* Wait if any ioctl is being submitted with this buffer. */
63 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
64 return false;
65 }
66
67 if (bo->is_shared) {
68 /* We can't use user fences for shared buffers, because user fences
69 * are local to this process only. If we want to wait for all buffer
70 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
71 */
72 bool buffer_busy = true;
73 int r;
74
75 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
76 if (r)
77 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
78 r);
79 return !buffer_busy;
80 }
81
82 if (timeout == 0) {
83 unsigned idle_fences;
84 bool buffer_idle;
85
86 pipe_mutex_lock(ws->bo_fence_lock);
87
88 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
89 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
90 break;
91 }
92
93 /* Release the idle fences to avoid checking them again later. */
94 for (unsigned i = 0; i < idle_fences; ++i)
95 amdgpu_fence_reference(&bo->fences[i], NULL);
96
97 memmove(&bo->fences[0], &bo->fences[idle_fences],
98 (bo->num_fences - idle_fences) * sizeof(*bo->fences));
99 bo->num_fences -= idle_fences;
100
101 buffer_idle = !bo->num_fences;
102 pipe_mutex_unlock(ws->bo_fence_lock);
103
104 return buffer_idle;
105 } else {
106 bool buffer_idle = true;
107
108 pipe_mutex_lock(ws->bo_fence_lock);
109 while (bo->num_fences && buffer_idle) {
110 struct pipe_fence_handle *fence = NULL;
111 bool fence_idle = false;
112
113 amdgpu_fence_reference(&fence, bo->fences[0]);
114
115 /* Wait for the fence. */
116 pipe_mutex_unlock(ws->bo_fence_lock);
117 if (amdgpu_fence_wait(fence, abs_timeout, true))
118 fence_idle = true;
119 else
120 buffer_idle = false;
121 pipe_mutex_lock(ws->bo_fence_lock);
122
123 /* Release an idle fence to avoid checking it again later, keeping in
124 * mind that the fence array may have been modified by other threads.
125 */
126 if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
127 amdgpu_fence_reference(&bo->fences[0], NULL);
128 memmove(&bo->fences[0], &bo->fences[1],
129 (bo->num_fences - 1) * sizeof(*bo->fences));
130 bo->num_fences--;
131 }
132
133 amdgpu_fence_reference(&fence, NULL);
134 }
135 pipe_mutex_unlock(ws->bo_fence_lock);
136
137 return buffer_idle;
138 }
139 }
140
141 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
142 struct pb_buffer *buf)
143 {
144 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
145 }
146
147 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
148 {
149 for (unsigned i = 0; i < bo->num_fences; ++i)
150 amdgpu_fence_reference(&bo->fences[i], NULL);
151
152 FREE(bo->fences);
153 bo->num_fences = 0;
154 bo->max_fences = 0;
155 }
156
157 void amdgpu_bo_destroy(struct pb_buffer *_buf)
158 {
159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
160
161 assert(bo->bo && "must not be called for slab entries");
162
163 pipe_mutex_lock(bo->ws->global_bo_list_lock);
164 LIST_DEL(&bo->u.real.global_list_item);
165 bo->ws->num_buffers--;
166 pipe_mutex_unlock(bo->ws->global_bo_list_lock);
167
168 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
169 amdgpu_va_range_free(bo->u.real.va_handle);
170 amdgpu_bo_free(bo->bo);
171
172 amdgpu_bo_remove_fences(bo);
173
174 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
175 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
176 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
177 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
178
179 if (bo->u.real.map_count >= 1) {
180 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
181 bo->ws->mapped_vram -= bo->base.size;
182 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
183 bo->ws->mapped_gtt -= bo->base.size;
184 }
185
186 FREE(bo);
187 }
188
189 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
190 {
191 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
192
193 assert(bo->bo); /* slab buffers have a separate vtbl */
194
195 if (bo->u.real.use_reusable_pool)
196 pb_cache_add_buffer(&bo->u.real.cache_entry);
197 else
198 amdgpu_bo_destroy(_buf);
199 }
200
201 static void *amdgpu_bo_map(struct pb_buffer *buf,
202 struct radeon_winsys_cs *rcs,
203 enum pipe_transfer_usage usage)
204 {
205 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
206 struct amdgpu_winsys_bo *real;
207 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
208 int r;
209 void *cpu = NULL;
210 uint64_t offset = 0;
211
212 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
213 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
214 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
215 if (usage & PIPE_TRANSFER_DONTBLOCK) {
216 if (!(usage & PIPE_TRANSFER_WRITE)) {
217 /* Mapping for read.
218 *
219 * Since we are mapping for read, we don't need to wait
220 * if the GPU is using the buffer for read too
221 * (neither one is changing it).
222 *
223 * Only check whether the buffer is being used for write. */
224 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
225 RADEON_USAGE_WRITE)) {
226 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
227 return NULL;
228 }
229
230 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
231 RADEON_USAGE_WRITE)) {
232 return NULL;
233 }
234 } else {
235 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
236 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
237 return NULL;
238 }
239
240 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
241 RADEON_USAGE_READWRITE)) {
242 return NULL;
243 }
244 }
245 } else {
246 uint64_t time = os_time_get_nano();
247
248 if (!(usage & PIPE_TRANSFER_WRITE)) {
249 /* Mapping for read.
250 *
251 * Since we are mapping for read, we don't need to wait
252 * if the GPU is using the buffer for read too
253 * (neither one is changing it).
254 *
255 * Only check whether the buffer is being used for write. */
256 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
257 RADEON_USAGE_WRITE)) {
258 cs->flush_cs(cs->flush_data, 0, NULL);
259 } else {
260 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
261 if (p_atomic_read(&bo->num_active_ioctls))
262 amdgpu_cs_sync_flush(rcs);
263 }
264 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
265 RADEON_USAGE_WRITE);
266 } else {
267 /* Mapping for write. */
268 if (cs) {
269 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
270 cs->flush_cs(cs->flush_data, 0, NULL);
271 } else {
272 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
273 if (p_atomic_read(&bo->num_active_ioctls))
274 amdgpu_cs_sync_flush(rcs);
275 }
276 }
277
278 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
279 RADEON_USAGE_READWRITE);
280 }
281
282 bo->ws->buffer_wait_time += os_time_get_nano() - time;
283 }
284 }
285
286 /* If the buffer is created from user memory, return the user pointer. */
287 if (bo->user_ptr)
288 return bo->user_ptr;
289
290 if (bo->bo) {
291 real = bo;
292 } else {
293 real = bo->u.slab.real;
294 offset = bo->va - real->va;
295 }
296
297 r = amdgpu_bo_cpu_map(real->bo, &cpu);
298 if (r) {
299 /* Clear the cache and try again. */
300 pb_cache_release_all_buffers(&real->ws->bo_cache);
301 r = amdgpu_bo_cpu_map(real->bo, &cpu);
302 if (r)
303 return NULL;
304 }
305
306 if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
307 if (real->initial_domain & RADEON_DOMAIN_VRAM)
308 real->ws->mapped_vram += real->base.size;
309 else if (real->initial_domain & RADEON_DOMAIN_GTT)
310 real->ws->mapped_gtt += real->base.size;
311 }
312 return (uint8_t*)cpu + offset;
313 }
314
315 static void amdgpu_bo_unmap(struct pb_buffer *buf)
316 {
317 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
318 struct amdgpu_winsys_bo *real;
319
320 if (bo->user_ptr)
321 return;
322
323 real = bo->bo ? bo : bo->u.slab.real;
324
325 if (p_atomic_dec_zero(&real->u.real.map_count)) {
326 if (real->initial_domain & RADEON_DOMAIN_VRAM)
327 real->ws->mapped_vram -= real->base.size;
328 else if (real->initial_domain & RADEON_DOMAIN_GTT)
329 real->ws->mapped_gtt -= real->base.size;
330 }
331
332 amdgpu_bo_cpu_unmap(real->bo);
333 }
334
335 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
336 amdgpu_bo_destroy_or_cache
337 /* other functions are never called */
338 };
339
340 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
341 {
342 struct amdgpu_winsys *ws = bo->ws;
343
344 assert(bo->bo);
345
346 pipe_mutex_lock(ws->global_bo_list_lock);
347 LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
348 ws->num_buffers++;
349 pipe_mutex_unlock(ws->global_bo_list_lock);
350 }
351
352 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
353 uint64_t size,
354 unsigned alignment,
355 unsigned usage,
356 enum radeon_bo_domain initial_domain,
357 unsigned flags,
358 unsigned pb_cache_bucket)
359 {
360 struct amdgpu_bo_alloc_request request = {0};
361 amdgpu_bo_handle buf_handle;
362 uint64_t va = 0;
363 struct amdgpu_winsys_bo *bo;
364 amdgpu_va_handle va_handle;
365 unsigned va_gap_size;
366 int r;
367
368 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
369 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
370 if (!bo) {
371 return NULL;
372 }
373
374 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
375 pb_cache_bucket);
376 request.alloc_size = size;
377 request.phys_alignment = alignment;
378
379 if (initial_domain & RADEON_DOMAIN_VRAM)
380 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
381 if (initial_domain & RADEON_DOMAIN_GTT)
382 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
383
384 if (flags & RADEON_FLAG_CPU_ACCESS)
385 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
386 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
387 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
388 if (flags & RADEON_FLAG_GTT_WC)
389 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
390
391 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
392 if (r) {
393 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
394 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
395 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
396 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
397 goto error_bo_alloc;
398 }
399
400 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
401 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
402 size + va_gap_size, alignment, 0, &va, &va_handle, 0);
403 if (r)
404 goto error_va_alloc;
405
406 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
407 if (r)
408 goto error_va_map;
409
410 pipe_reference_init(&bo->base.reference, 1);
411 bo->base.alignment = alignment;
412 bo->base.usage = usage;
413 bo->base.size = size;
414 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
415 bo->ws = ws;
416 bo->bo = buf_handle;
417 bo->va = va;
418 bo->u.real.va_handle = va_handle;
419 bo->initial_domain = initial_domain;
420 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
421
422 if (initial_domain & RADEON_DOMAIN_VRAM)
423 ws->allocated_vram += align64(size, ws->info.gart_page_size);
424 else if (initial_domain & RADEON_DOMAIN_GTT)
425 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
426
427 amdgpu_add_buffer_to_global_list(bo);
428
429 return bo;
430
431 error_va_map:
432 amdgpu_va_range_free(va_handle);
433
434 error_va_alloc:
435 amdgpu_bo_free(buf_handle);
436
437 error_bo_alloc:
438 FREE(bo);
439 return NULL;
440 }
441
442 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
443 {
444 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
445
446 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
447 return false;
448 }
449
450 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
451 }
452
453 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
454 {
455 struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
456 bo = container_of(entry, bo, u.slab.entry);
457
458 return amdgpu_bo_can_reclaim(&bo->base);
459 }
460
461 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
462 {
463 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
464
465 assert(!bo->bo);
466
467 pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry);
468 }
469
470 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
471 amdgpu_bo_slab_destroy
472 /* other functions are never called */
473 };
474
475 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
476 unsigned entry_size,
477 unsigned group_index)
478 {
479 struct amdgpu_winsys *ws = priv;
480 struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
481 enum radeon_bo_domain domains;
482 enum radeon_bo_flag flags = 0;
483 uint32_t base_id;
484
485 if (!slab)
486 return NULL;
487
488 if (heap & 1)
489 flags |= RADEON_FLAG_GTT_WC;
490 if (heap & 2)
491 flags |= RADEON_FLAG_CPU_ACCESS;
492
493 switch (heap >> 2) {
494 case 0:
495 domains = RADEON_DOMAIN_VRAM;
496 break;
497 default:
498 case 1:
499 domains = RADEON_DOMAIN_VRAM_GTT;
500 break;
501 case 2:
502 domains = RADEON_DOMAIN_GTT;
503 break;
504 }
505
506 slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
507 64 * 1024, 64 * 1024,
508 domains, flags));
509 if (!slab->buffer)
510 goto fail;
511
512 assert(slab->buffer->bo);
513
514 slab->base.num_entries = slab->buffer->base.size / entry_size;
515 slab->base.num_free = slab->base.num_entries;
516 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
517 if (!slab->entries)
518 goto fail_buffer;
519
520 LIST_INITHEAD(&slab->base.free);
521
522 base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
523
524 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
525 struct amdgpu_winsys_bo *bo = &slab->entries[i];
526
527 bo->base.alignment = entry_size;
528 bo->base.usage = slab->buffer->base.usage;
529 bo->base.size = entry_size;
530 bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
531 bo->ws = ws;
532 bo->va = slab->buffer->va + i * entry_size;
533 bo->initial_domain = domains;
534 bo->unique_id = base_id + i;
535 bo->u.slab.entry.slab = &slab->base;
536 bo->u.slab.entry.group_index = group_index;
537 bo->u.slab.real = slab->buffer;
538
539 LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
540 }
541
542 return &slab->base;
543
544 fail_buffer:
545 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
546 fail:
547 FREE(slab);
548 return NULL;
549 }
550
551 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
552 {
553 struct amdgpu_slab *slab = amdgpu_slab(pslab);
554
555 for (unsigned i = 0; i < slab->base.num_entries; ++i)
556 amdgpu_bo_remove_fences(&slab->entries[i]);
557
558 FREE(slab->entries);
559 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
560 FREE(slab);
561 }
562
563 static unsigned eg_tile_split(unsigned tile_split)
564 {
565 switch (tile_split) {
566 case 0: tile_split = 64; break;
567 case 1: tile_split = 128; break;
568 case 2: tile_split = 256; break;
569 case 3: tile_split = 512; break;
570 default:
571 case 4: tile_split = 1024; break;
572 case 5: tile_split = 2048; break;
573 case 6: tile_split = 4096; break;
574 }
575 return tile_split;
576 }
577
578 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
579 {
580 switch (eg_tile_split) {
581 case 64: return 0;
582 case 128: return 1;
583 case 256: return 2;
584 case 512: return 3;
585 default:
586 case 1024: return 4;
587 case 2048: return 5;
588 case 4096: return 6;
589 }
590 }
591
592 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
593 struct radeon_bo_metadata *md)
594 {
595 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
596 struct amdgpu_bo_info info = {0};
597 uint32_t tiling_flags;
598 int r;
599
600 assert(bo->bo && "must not be called for slab entries");
601
602 r = amdgpu_bo_query_info(bo->bo, &info);
603 if (r)
604 return;
605
606 tiling_flags = info.metadata.tiling_info;
607
608 md->microtile = RADEON_LAYOUT_LINEAR;
609 md->macrotile = RADEON_LAYOUT_LINEAR;
610
611 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
612 md->macrotile = RADEON_LAYOUT_TILED;
613 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
614 md->microtile = RADEON_LAYOUT_TILED;
615
616 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
617 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
618 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
619 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
620 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
621 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
622 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
623
624 md->size_metadata = info.metadata.size_metadata;
625 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
626 }
627
628 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
629 struct radeon_bo_metadata *md)
630 {
631 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
632 struct amdgpu_bo_metadata metadata = {0};
633 uint32_t tiling_flags = 0;
634
635 assert(bo->bo && "must not be called for slab entries");
636
637 if (md->macrotile == RADEON_LAYOUT_TILED)
638 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
639 else if (md->microtile == RADEON_LAYOUT_TILED)
640 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
641 else
642 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
643
644 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
645 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
646 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
647 if (md->tile_split)
648 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
649 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
650 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
651
652 if (md->scanout)
653 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
654 else
655 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
656
657 metadata.tiling_info = tiling_flags;
658 metadata.size_metadata = md->size_metadata;
659 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
660
661 amdgpu_bo_set_metadata(bo->bo, &metadata);
662 }
663
664 static struct pb_buffer *
665 amdgpu_bo_create(struct radeon_winsys *rws,
666 uint64_t size,
667 unsigned alignment,
668 enum radeon_bo_domain domain,
669 enum radeon_bo_flag flags)
670 {
671 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
672 struct amdgpu_winsys_bo *bo;
673 unsigned usage = 0, pb_cache_bucket;
674
675 /* Sub-allocate small buffers from slabs. */
676 if (!(flags & RADEON_FLAG_HANDLE) &&
677 size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
678 alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
679 struct pb_slab_entry *entry;
680 unsigned heap = 0;
681
682 if (flags & RADEON_FLAG_GTT_WC)
683 heap |= 1;
684 if (flags & RADEON_FLAG_CPU_ACCESS)
685 heap |= 2;
686 if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
687 goto no_slab;
688
689 switch (domain) {
690 case RADEON_DOMAIN_VRAM:
691 heap |= 0 * 4;
692 break;
693 case RADEON_DOMAIN_VRAM_GTT:
694 heap |= 1 * 4;
695 break;
696 case RADEON_DOMAIN_GTT:
697 heap |= 2 * 4;
698 break;
699 default:
700 goto no_slab;
701 }
702
703 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
704 if (!entry) {
705 /* Clear the cache and try again. */
706 pb_cache_release_all_buffers(&ws->bo_cache);
707
708 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
709 }
710 if (!entry)
711 return NULL;
712
713 bo = NULL;
714 bo = container_of(entry, bo, u.slab.entry);
715
716 pipe_reference_init(&bo->base.reference, 1);
717
718 return &bo->base;
719 }
720 no_slab:
721
722 /* This flag is irrelevant for the cache. */
723 flags &= ~RADEON_FLAG_HANDLE;
724
725 /* Align size to page size. This is the minimum alignment for normal
726 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
727 * like constant/uniform buffers, can benefit from better and more reuse.
728 */
729 size = align64(size, ws->info.gart_page_size);
730 alignment = align(alignment, ws->info.gart_page_size);
731
732 /* Only set one usage bit each for domains and flags, or the cache manager
733 * might consider different sets of domains / flags compatible
734 */
735 if (domain == RADEON_DOMAIN_VRAM_GTT)
736 usage = 1 << 2;
737 else
738 usage = domain >> 1;
739 assert(flags < sizeof(usage) * 8 - 3);
740 usage |= 1 << (flags + 3);
741
742 /* Determine the pb_cache bucket for minimizing pb_cache misses. */
743 pb_cache_bucket = 0;
744 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
745 pb_cache_bucket += 1;
746 if (flags == RADEON_FLAG_GTT_WC) /* WC */
747 pb_cache_bucket += 2;
748 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
749
750 /* Get a buffer from the cache. */
751 bo = (struct amdgpu_winsys_bo*)
752 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
753 pb_cache_bucket);
754 if (bo)
755 return &bo->base;
756
757 /* Create a new one. */
758 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
759 pb_cache_bucket);
760 if (!bo) {
761 /* Clear the cache and try again. */
762 pb_slabs_reclaim(&ws->bo_slabs);
763 pb_cache_release_all_buffers(&ws->bo_cache);
764 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
765 pb_cache_bucket);
766 if (!bo)
767 return NULL;
768 }
769
770 bo->u.real.use_reusable_pool = true;
771 return &bo->base;
772 }
773
774 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
775 struct winsys_handle *whandle,
776 unsigned *stride,
777 unsigned *offset)
778 {
779 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
780 struct amdgpu_winsys_bo *bo;
781 enum amdgpu_bo_handle_type type;
782 struct amdgpu_bo_import_result result = {0};
783 uint64_t va;
784 amdgpu_va_handle va_handle;
785 struct amdgpu_bo_info info = {0};
786 enum radeon_bo_domain initial = 0;
787 int r;
788
789 /* Initialize the structure. */
790 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
791 if (!bo) {
792 return NULL;
793 }
794
795 switch (whandle->type) {
796 case DRM_API_HANDLE_TYPE_SHARED:
797 type = amdgpu_bo_handle_type_gem_flink_name;
798 break;
799 case DRM_API_HANDLE_TYPE_FD:
800 type = amdgpu_bo_handle_type_dma_buf_fd;
801 break;
802 default:
803 return NULL;
804 }
805
806 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
807 if (r)
808 goto error;
809
810 /* Get initial domains. */
811 r = amdgpu_bo_query_info(result.buf_handle, &info);
812 if (r)
813 goto error_query;
814
815 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
816 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
817 if (r)
818 goto error_query;
819
820 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
821 if (r)
822 goto error_va_map;
823
824 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
825 initial |= RADEON_DOMAIN_VRAM;
826 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
827 initial |= RADEON_DOMAIN_GTT;
828
829
830 pipe_reference_init(&bo->base.reference, 1);
831 bo->base.alignment = info.phys_alignment;
832 bo->bo = result.buf_handle;
833 bo->base.size = result.alloc_size;
834 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
835 bo->ws = ws;
836 bo->va = va;
837 bo->u.real.va_handle = va_handle;
838 bo->initial_domain = initial;
839 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
840 bo->is_shared = true;
841
842 if (stride)
843 *stride = whandle->stride;
844 if (offset)
845 *offset = whandle->offset;
846
847 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
848 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
849 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
850 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
851
852 amdgpu_add_buffer_to_global_list(bo);
853
854 return &bo->base;
855
856 error_va_map:
857 amdgpu_va_range_free(va_handle);
858
859 error_query:
860 amdgpu_bo_free(result.buf_handle);
861
862 error:
863 FREE(bo);
864 return NULL;
865 }
866
867 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
868 unsigned stride, unsigned offset,
869 unsigned slice_size,
870 struct winsys_handle *whandle)
871 {
872 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
873 enum amdgpu_bo_handle_type type;
874 int r;
875
876 if (!bo->bo) {
877 offset += bo->va - bo->u.slab.real->va;
878 bo = bo->u.slab.real;
879 }
880
881 bo->u.real.use_reusable_pool = false;
882
883 switch (whandle->type) {
884 case DRM_API_HANDLE_TYPE_SHARED:
885 type = amdgpu_bo_handle_type_gem_flink_name;
886 break;
887 case DRM_API_HANDLE_TYPE_FD:
888 type = amdgpu_bo_handle_type_dma_buf_fd;
889 break;
890 case DRM_API_HANDLE_TYPE_KMS:
891 type = amdgpu_bo_handle_type_kms;
892 break;
893 default:
894 return false;
895 }
896
897 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
898 if (r)
899 return false;
900
901 whandle->stride = stride;
902 whandle->offset = offset;
903 whandle->offset += slice_size * whandle->layer;
904 bo->is_shared = true;
905 return true;
906 }
907
908 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
909 void *pointer, uint64_t size)
910 {
911 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
912 amdgpu_bo_handle buf_handle;
913 struct amdgpu_winsys_bo *bo;
914 uint64_t va;
915 amdgpu_va_handle va_handle;
916
917 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
918 if (!bo)
919 return NULL;
920
921 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
922 goto error;
923
924 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
925 size, 1 << 12, 0, &va, &va_handle, 0))
926 goto error_va_alloc;
927
928 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
929 goto error_va_map;
930
931 /* Initialize it. */
932 pipe_reference_init(&bo->base.reference, 1);
933 bo->bo = buf_handle;
934 bo->base.alignment = 0;
935 bo->base.size = size;
936 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
937 bo->ws = ws;
938 bo->user_ptr = pointer;
939 bo->va = va;
940 bo->u.real.va_handle = va_handle;
941 bo->initial_domain = RADEON_DOMAIN_GTT;
942 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
943
944 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
945
946 amdgpu_add_buffer_to_global_list(bo);
947
948 return (struct pb_buffer*)bo;
949
950 error_va_map:
951 amdgpu_va_range_free(va_handle);
952
953 error_va_alloc:
954 amdgpu_bo_free(buf_handle);
955
956 error:
957 FREE(bo);
958 return NULL;
959 }
960
961 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
962 {
963 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
964 }
965
966 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
967 {
968 return ((struct amdgpu_winsys_bo*)buf)->va;
969 }
970
971 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
972 {
973 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
974 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
975 ws->base.buffer_map = amdgpu_bo_map;
976 ws->base.buffer_unmap = amdgpu_bo_unmap;
977 ws->base.buffer_wait = amdgpu_bo_wait;
978 ws->base.buffer_create = amdgpu_bo_create;
979 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
980 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
981 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
982 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
983 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
984 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
985 }