gallium/util: replace pipe_mutex_unlock() with mtx_unlock()
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static struct pb_buffer *
42 amdgpu_bo_create(struct radeon_winsys *rws,
43 uint64_t size,
44 unsigned alignment,
45 enum radeon_bo_domain domain,
46 enum radeon_bo_flag flags);
47
48 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
49 enum radeon_bo_usage usage)
50 {
51 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
52 struct amdgpu_winsys *ws = bo->ws;
53 int64_t abs_timeout;
54
55 if (timeout == 0) {
56 if (p_atomic_read(&bo->num_active_ioctls))
57 return false;
58
59 } else {
60 abs_timeout = os_time_get_absolute_timeout(timeout);
61
62 /* Wait if any ioctl is being submitted with this buffer. */
63 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
64 return false;
65 }
66
67 if (bo->is_shared) {
68 /* We can't use user fences for shared buffers, because user fences
69 * are local to this process only. If we want to wait for all buffer
70 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
71 */
72 bool buffer_busy = true;
73 int r;
74
75 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
76 if (r)
77 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
78 r);
79 return !buffer_busy;
80 }
81
82 if (timeout == 0) {
83 unsigned idle_fences;
84 bool buffer_idle;
85
86 mtx_lock(&ws->bo_fence_lock);
87
88 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) {
89 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false))
90 break;
91 }
92
93 /* Release the idle fences to avoid checking them again later. */
94 for (unsigned i = 0; i < idle_fences; ++i)
95 amdgpu_fence_reference(&bo->fences[i], NULL);
96
97 memmove(&bo->fences[0], &bo->fences[idle_fences],
98 (bo->num_fences - idle_fences) * sizeof(*bo->fences));
99 bo->num_fences -= idle_fences;
100
101 buffer_idle = !bo->num_fences;
102 mtx_unlock(&ws->bo_fence_lock);
103
104 return buffer_idle;
105 } else {
106 bool buffer_idle = true;
107
108 mtx_lock(&ws->bo_fence_lock);
109 while (bo->num_fences && buffer_idle) {
110 struct pipe_fence_handle *fence = NULL;
111 bool fence_idle = false;
112
113 amdgpu_fence_reference(&fence, bo->fences[0]);
114
115 /* Wait for the fence. */
116 mtx_unlock(&ws->bo_fence_lock);
117 if (amdgpu_fence_wait(fence, abs_timeout, true))
118 fence_idle = true;
119 else
120 buffer_idle = false;
121 mtx_lock(&ws->bo_fence_lock);
122
123 /* Release an idle fence to avoid checking it again later, keeping in
124 * mind that the fence array may have been modified by other threads.
125 */
126 if (fence_idle && bo->num_fences && bo->fences[0] == fence) {
127 amdgpu_fence_reference(&bo->fences[0], NULL);
128 memmove(&bo->fences[0], &bo->fences[1],
129 (bo->num_fences - 1) * sizeof(*bo->fences));
130 bo->num_fences--;
131 }
132
133 amdgpu_fence_reference(&fence, NULL);
134 }
135 mtx_unlock(&ws->bo_fence_lock);
136
137 return buffer_idle;
138 }
139 }
140
141 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
142 struct pb_buffer *buf)
143 {
144 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
145 }
146
147 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo)
148 {
149 for (unsigned i = 0; i < bo->num_fences; ++i)
150 amdgpu_fence_reference(&bo->fences[i], NULL);
151
152 FREE(bo->fences);
153 bo->num_fences = 0;
154 bo->max_fences = 0;
155 }
156
157 void amdgpu_bo_destroy(struct pb_buffer *_buf)
158 {
159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
160
161 assert(bo->bo && "must not be called for slab entries");
162
163 mtx_lock(&bo->ws->global_bo_list_lock);
164 LIST_DEL(&bo->u.real.global_list_item);
165 bo->ws->num_buffers--;
166 mtx_unlock(&bo->ws->global_bo_list_lock);
167
168 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
169 amdgpu_va_range_free(bo->u.real.va_handle);
170 amdgpu_bo_free(bo->bo);
171
172 amdgpu_bo_remove_fences(bo);
173
174 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
175 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
176 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
177 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
178
179 if (bo->u.real.map_count >= 1) {
180 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
181 bo->ws->mapped_vram -= bo->base.size;
182 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
183 bo->ws->mapped_gtt -= bo->base.size;
184 bo->ws->num_mapped_buffers--;
185 }
186
187 FREE(bo);
188 }
189
190 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
191 {
192 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
193
194 assert(bo->bo); /* slab buffers have a separate vtbl */
195
196 if (bo->u.real.use_reusable_pool)
197 pb_cache_add_buffer(&bo->u.real.cache_entry);
198 else
199 amdgpu_bo_destroy(_buf);
200 }
201
202 static void *amdgpu_bo_map(struct pb_buffer *buf,
203 struct radeon_winsys_cs *rcs,
204 enum pipe_transfer_usage usage)
205 {
206 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
207 struct amdgpu_winsys_bo *real;
208 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
209 int r;
210 void *cpu = NULL;
211 uint64_t offset = 0;
212
213 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
214 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
215 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
216 if (usage & PIPE_TRANSFER_DONTBLOCK) {
217 if (!(usage & PIPE_TRANSFER_WRITE)) {
218 /* Mapping for read.
219 *
220 * Since we are mapping for read, we don't need to wait
221 * if the GPU is using the buffer for read too
222 * (neither one is changing it).
223 *
224 * Only check whether the buffer is being used for write. */
225 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
226 RADEON_USAGE_WRITE)) {
227 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
228 return NULL;
229 }
230
231 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
232 RADEON_USAGE_WRITE)) {
233 return NULL;
234 }
235 } else {
236 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
237 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
238 return NULL;
239 }
240
241 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
242 RADEON_USAGE_READWRITE)) {
243 return NULL;
244 }
245 }
246 } else {
247 uint64_t time = os_time_get_nano();
248
249 if (!(usage & PIPE_TRANSFER_WRITE)) {
250 /* Mapping for read.
251 *
252 * Since we are mapping for read, we don't need to wait
253 * if the GPU is using the buffer for read too
254 * (neither one is changing it).
255 *
256 * Only check whether the buffer is being used for write. */
257 if (cs) {
258 if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
259 RADEON_USAGE_WRITE)) {
260 cs->flush_cs(cs->flush_data, 0, NULL);
261 } else {
262 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
263 if (p_atomic_read(&bo->num_active_ioctls))
264 amdgpu_cs_sync_flush(rcs);
265 }
266 }
267
268 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
269 RADEON_USAGE_WRITE);
270 } else {
271 /* Mapping for write. */
272 if (cs) {
273 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
274 cs->flush_cs(cs->flush_data, 0, NULL);
275 } else {
276 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
277 if (p_atomic_read(&bo->num_active_ioctls))
278 amdgpu_cs_sync_flush(rcs);
279 }
280 }
281
282 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
283 RADEON_USAGE_READWRITE);
284 }
285
286 bo->ws->buffer_wait_time += os_time_get_nano() - time;
287 }
288 }
289
290 /* If the buffer is created from user memory, return the user pointer. */
291 if (bo->user_ptr)
292 return bo->user_ptr;
293
294 if (bo->bo) {
295 real = bo;
296 } else {
297 real = bo->u.slab.real;
298 offset = bo->va - real->va;
299 }
300
301 r = amdgpu_bo_cpu_map(real->bo, &cpu);
302 if (r) {
303 /* Clear the cache and try again. */
304 pb_cache_release_all_buffers(&real->ws->bo_cache);
305 r = amdgpu_bo_cpu_map(real->bo, &cpu);
306 if (r)
307 return NULL;
308 }
309
310 if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
311 if (real->initial_domain & RADEON_DOMAIN_VRAM)
312 real->ws->mapped_vram += real->base.size;
313 else if (real->initial_domain & RADEON_DOMAIN_GTT)
314 real->ws->mapped_gtt += real->base.size;
315 real->ws->num_mapped_buffers++;
316 }
317 return (uint8_t*)cpu + offset;
318 }
319
320 static void amdgpu_bo_unmap(struct pb_buffer *buf)
321 {
322 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
323 struct amdgpu_winsys_bo *real;
324
325 if (bo->user_ptr)
326 return;
327
328 real = bo->bo ? bo : bo->u.slab.real;
329
330 if (p_atomic_dec_zero(&real->u.real.map_count)) {
331 if (real->initial_domain & RADEON_DOMAIN_VRAM)
332 real->ws->mapped_vram -= real->base.size;
333 else if (real->initial_domain & RADEON_DOMAIN_GTT)
334 real->ws->mapped_gtt -= real->base.size;
335 real->ws->num_mapped_buffers--;
336 }
337
338 amdgpu_bo_cpu_unmap(real->bo);
339 }
340
341 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
342 amdgpu_bo_destroy_or_cache
343 /* other functions are never called */
344 };
345
346 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
347 {
348 struct amdgpu_winsys *ws = bo->ws;
349
350 assert(bo->bo);
351
352 mtx_lock(&ws->global_bo_list_lock);
353 LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
354 ws->num_buffers++;
355 mtx_unlock(&ws->global_bo_list_lock);
356 }
357
358 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
359 uint64_t size,
360 unsigned alignment,
361 unsigned usage,
362 enum radeon_bo_domain initial_domain,
363 unsigned flags,
364 unsigned pb_cache_bucket)
365 {
366 struct amdgpu_bo_alloc_request request = {0};
367 amdgpu_bo_handle buf_handle;
368 uint64_t va = 0;
369 struct amdgpu_winsys_bo *bo;
370 amdgpu_va_handle va_handle;
371 unsigned va_gap_size;
372 int r;
373
374 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
375 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
376 if (!bo) {
377 return NULL;
378 }
379
380 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
381 pb_cache_bucket);
382 request.alloc_size = size;
383 request.phys_alignment = alignment;
384
385 if (initial_domain & RADEON_DOMAIN_VRAM)
386 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
387 if (initial_domain & RADEON_DOMAIN_GTT)
388 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
389
390 if (flags & RADEON_FLAG_CPU_ACCESS)
391 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
392 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
393 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
394 if (flags & RADEON_FLAG_GTT_WC)
395 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
396
397 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
398 if (r) {
399 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
400 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
401 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
402 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
403 goto error_bo_alloc;
404 }
405
406 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
407 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
408 size + va_gap_size, alignment, 0, &va, &va_handle, 0);
409 if (r)
410 goto error_va_alloc;
411
412 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
413 if (r)
414 goto error_va_map;
415
416 pipe_reference_init(&bo->base.reference, 1);
417 bo->base.alignment = alignment;
418 bo->base.usage = usage;
419 bo->base.size = size;
420 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
421 bo->ws = ws;
422 bo->bo = buf_handle;
423 bo->va = va;
424 bo->u.real.va_handle = va_handle;
425 bo->initial_domain = initial_domain;
426 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
427
428 if (initial_domain & RADEON_DOMAIN_VRAM)
429 ws->allocated_vram += align64(size, ws->info.gart_page_size);
430 else if (initial_domain & RADEON_DOMAIN_GTT)
431 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
432
433 amdgpu_add_buffer_to_global_list(bo);
434
435 return bo;
436
437 error_va_map:
438 amdgpu_va_range_free(va_handle);
439
440 error_va_alloc:
441 amdgpu_bo_free(buf_handle);
442
443 error_bo_alloc:
444 FREE(bo);
445 return NULL;
446 }
447
448 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
449 {
450 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
451
452 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
453 return false;
454 }
455
456 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
457 }
458
459 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
460 {
461 struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
462 bo = container_of(entry, bo, u.slab.entry);
463
464 return amdgpu_bo_can_reclaim(&bo->base);
465 }
466
467 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
468 {
469 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
470
471 assert(!bo->bo);
472
473 pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry);
474 }
475
476 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
477 amdgpu_bo_slab_destroy
478 /* other functions are never called */
479 };
480
481 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
482 unsigned entry_size,
483 unsigned group_index)
484 {
485 struct amdgpu_winsys *ws = priv;
486 struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
487 enum radeon_bo_domain domains;
488 enum radeon_bo_flag flags = 0;
489 uint32_t base_id;
490
491 if (!slab)
492 return NULL;
493
494 if (heap & 1)
495 flags |= RADEON_FLAG_GTT_WC;
496 if (heap & 2)
497 flags |= RADEON_FLAG_CPU_ACCESS;
498
499 switch (heap >> 2) {
500 case 0:
501 domains = RADEON_DOMAIN_VRAM;
502 break;
503 default:
504 case 1:
505 domains = RADEON_DOMAIN_VRAM_GTT;
506 break;
507 case 2:
508 domains = RADEON_DOMAIN_GTT;
509 break;
510 }
511
512 slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
513 64 * 1024, 64 * 1024,
514 domains, flags));
515 if (!slab->buffer)
516 goto fail;
517
518 assert(slab->buffer->bo);
519
520 slab->base.num_entries = slab->buffer->base.size / entry_size;
521 slab->base.num_free = slab->base.num_entries;
522 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
523 if (!slab->entries)
524 goto fail_buffer;
525
526 LIST_INITHEAD(&slab->base.free);
527
528 base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
529
530 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
531 struct amdgpu_winsys_bo *bo = &slab->entries[i];
532
533 bo->base.alignment = entry_size;
534 bo->base.usage = slab->buffer->base.usage;
535 bo->base.size = entry_size;
536 bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
537 bo->ws = ws;
538 bo->va = slab->buffer->va + i * entry_size;
539 bo->initial_domain = domains;
540 bo->unique_id = base_id + i;
541 bo->u.slab.entry.slab = &slab->base;
542 bo->u.slab.entry.group_index = group_index;
543 bo->u.slab.real = slab->buffer;
544
545 LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
546 }
547
548 return &slab->base;
549
550 fail_buffer:
551 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
552 fail:
553 FREE(slab);
554 return NULL;
555 }
556
557 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
558 {
559 struct amdgpu_slab *slab = amdgpu_slab(pslab);
560
561 for (unsigned i = 0; i < slab->base.num_entries; ++i)
562 amdgpu_bo_remove_fences(&slab->entries[i]);
563
564 FREE(slab->entries);
565 amdgpu_winsys_bo_reference(&slab->buffer, NULL);
566 FREE(slab);
567 }
568
569 static unsigned eg_tile_split(unsigned tile_split)
570 {
571 switch (tile_split) {
572 case 0: tile_split = 64; break;
573 case 1: tile_split = 128; break;
574 case 2: tile_split = 256; break;
575 case 3: tile_split = 512; break;
576 default:
577 case 4: tile_split = 1024; break;
578 case 5: tile_split = 2048; break;
579 case 6: tile_split = 4096; break;
580 }
581 return tile_split;
582 }
583
584 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
585 {
586 switch (eg_tile_split) {
587 case 64: return 0;
588 case 128: return 1;
589 case 256: return 2;
590 case 512: return 3;
591 default:
592 case 1024: return 4;
593 case 2048: return 5;
594 case 4096: return 6;
595 }
596 }
597
598 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
599 struct radeon_bo_metadata *md)
600 {
601 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
602 struct amdgpu_bo_info info = {0};
603 uint32_t tiling_flags;
604 int r;
605
606 assert(bo->bo && "must not be called for slab entries");
607
608 r = amdgpu_bo_query_info(bo->bo, &info);
609 if (r)
610 return;
611
612 tiling_flags = info.metadata.tiling_info;
613
614 md->microtile = RADEON_LAYOUT_LINEAR;
615 md->macrotile = RADEON_LAYOUT_LINEAR;
616
617 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
618 md->macrotile = RADEON_LAYOUT_TILED;
619 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
620 md->microtile = RADEON_LAYOUT_TILED;
621
622 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
623 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
624 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
625 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
626 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
627 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
628 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
629
630 md->size_metadata = info.metadata.size_metadata;
631 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
632 }
633
634 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
635 struct radeon_bo_metadata *md)
636 {
637 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
638 struct amdgpu_bo_metadata metadata = {0};
639 uint32_t tiling_flags = 0;
640
641 assert(bo->bo && "must not be called for slab entries");
642
643 if (md->macrotile == RADEON_LAYOUT_TILED)
644 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
645 else if (md->microtile == RADEON_LAYOUT_TILED)
646 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
647 else
648 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
649
650 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
651 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
652 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
653 if (md->tile_split)
654 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
655 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
656 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
657
658 if (md->scanout)
659 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
660 else
661 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
662
663 metadata.tiling_info = tiling_flags;
664 metadata.size_metadata = md->size_metadata;
665 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
666
667 amdgpu_bo_set_metadata(bo->bo, &metadata);
668 }
669
670 static struct pb_buffer *
671 amdgpu_bo_create(struct radeon_winsys *rws,
672 uint64_t size,
673 unsigned alignment,
674 enum radeon_bo_domain domain,
675 enum radeon_bo_flag flags)
676 {
677 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
678 struct amdgpu_winsys_bo *bo;
679 unsigned usage = 0, pb_cache_bucket;
680
681 /* Sub-allocate small buffers from slabs. */
682 if (!(flags & RADEON_FLAG_HANDLE) &&
683 size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
684 alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
685 struct pb_slab_entry *entry;
686 unsigned heap = 0;
687
688 if (flags & RADEON_FLAG_GTT_WC)
689 heap |= 1;
690 if (flags & RADEON_FLAG_CPU_ACCESS)
691 heap |= 2;
692 if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
693 goto no_slab;
694
695 switch (domain) {
696 case RADEON_DOMAIN_VRAM:
697 heap |= 0 * 4;
698 break;
699 case RADEON_DOMAIN_VRAM_GTT:
700 heap |= 1 * 4;
701 break;
702 case RADEON_DOMAIN_GTT:
703 heap |= 2 * 4;
704 break;
705 default:
706 goto no_slab;
707 }
708
709 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
710 if (!entry) {
711 /* Clear the cache and try again. */
712 pb_cache_release_all_buffers(&ws->bo_cache);
713
714 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
715 }
716 if (!entry)
717 return NULL;
718
719 bo = NULL;
720 bo = container_of(entry, bo, u.slab.entry);
721
722 pipe_reference_init(&bo->base.reference, 1);
723
724 return &bo->base;
725 }
726 no_slab:
727
728 /* This flag is irrelevant for the cache. */
729 flags &= ~RADEON_FLAG_HANDLE;
730
731 /* Align size to page size. This is the minimum alignment for normal
732 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
733 * like constant/uniform buffers, can benefit from better and more reuse.
734 */
735 size = align64(size, ws->info.gart_page_size);
736 alignment = align(alignment, ws->info.gart_page_size);
737
738 /* Only set one usage bit each for domains and flags, or the cache manager
739 * might consider different sets of domains / flags compatible
740 */
741 if (domain == RADEON_DOMAIN_VRAM_GTT)
742 usage = 1 << 2;
743 else
744 usage = domain >> 1;
745 assert(flags < sizeof(usage) * 8 - 3);
746 usage |= 1 << (flags + 3);
747
748 /* Determine the pb_cache bucket for minimizing pb_cache misses. */
749 pb_cache_bucket = 0;
750 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
751 pb_cache_bucket += 1;
752 if (flags == RADEON_FLAG_GTT_WC) /* WC */
753 pb_cache_bucket += 2;
754 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
755
756 /* Get a buffer from the cache. */
757 bo = (struct amdgpu_winsys_bo*)
758 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
759 pb_cache_bucket);
760 if (bo)
761 return &bo->base;
762
763 /* Create a new one. */
764 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
765 pb_cache_bucket);
766 if (!bo) {
767 /* Clear the cache and try again. */
768 pb_slabs_reclaim(&ws->bo_slabs);
769 pb_cache_release_all_buffers(&ws->bo_cache);
770 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
771 pb_cache_bucket);
772 if (!bo)
773 return NULL;
774 }
775
776 bo->u.real.use_reusable_pool = true;
777 return &bo->base;
778 }
779
780 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
781 struct winsys_handle *whandle,
782 unsigned *stride,
783 unsigned *offset)
784 {
785 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
786 struct amdgpu_winsys_bo *bo;
787 enum amdgpu_bo_handle_type type;
788 struct amdgpu_bo_import_result result = {0};
789 uint64_t va;
790 amdgpu_va_handle va_handle;
791 struct amdgpu_bo_info info = {0};
792 enum radeon_bo_domain initial = 0;
793 int r;
794
795 /* Initialize the structure. */
796 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
797 if (!bo) {
798 return NULL;
799 }
800
801 switch (whandle->type) {
802 case DRM_API_HANDLE_TYPE_SHARED:
803 type = amdgpu_bo_handle_type_gem_flink_name;
804 break;
805 case DRM_API_HANDLE_TYPE_FD:
806 type = amdgpu_bo_handle_type_dma_buf_fd;
807 break;
808 default:
809 return NULL;
810 }
811
812 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
813 if (r)
814 goto error;
815
816 /* Get initial domains. */
817 r = amdgpu_bo_query_info(result.buf_handle, &info);
818 if (r)
819 goto error_query;
820
821 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
822 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
823 if (r)
824 goto error_query;
825
826 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
827 if (r)
828 goto error_va_map;
829
830 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
831 initial |= RADEON_DOMAIN_VRAM;
832 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
833 initial |= RADEON_DOMAIN_GTT;
834
835
836 pipe_reference_init(&bo->base.reference, 1);
837 bo->base.alignment = info.phys_alignment;
838 bo->bo = result.buf_handle;
839 bo->base.size = result.alloc_size;
840 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
841 bo->ws = ws;
842 bo->va = va;
843 bo->u.real.va_handle = va_handle;
844 bo->initial_domain = initial;
845 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
846 bo->is_shared = true;
847
848 if (stride)
849 *stride = whandle->stride;
850 if (offset)
851 *offset = whandle->offset;
852
853 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
854 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
855 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
856 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
857
858 amdgpu_add_buffer_to_global_list(bo);
859
860 return &bo->base;
861
862 error_va_map:
863 amdgpu_va_range_free(va_handle);
864
865 error_query:
866 amdgpu_bo_free(result.buf_handle);
867
868 error:
869 FREE(bo);
870 return NULL;
871 }
872
873 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
874 unsigned stride, unsigned offset,
875 unsigned slice_size,
876 struct winsys_handle *whandle)
877 {
878 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
879 enum amdgpu_bo_handle_type type;
880 int r;
881
882 if (!bo->bo) {
883 offset += bo->va - bo->u.slab.real->va;
884 bo = bo->u.slab.real;
885 }
886
887 bo->u.real.use_reusable_pool = false;
888
889 switch (whandle->type) {
890 case DRM_API_HANDLE_TYPE_SHARED:
891 type = amdgpu_bo_handle_type_gem_flink_name;
892 break;
893 case DRM_API_HANDLE_TYPE_FD:
894 type = amdgpu_bo_handle_type_dma_buf_fd;
895 break;
896 case DRM_API_HANDLE_TYPE_KMS:
897 type = amdgpu_bo_handle_type_kms;
898 break;
899 default:
900 return false;
901 }
902
903 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
904 if (r)
905 return false;
906
907 whandle->stride = stride;
908 whandle->offset = offset;
909 whandle->offset += slice_size * whandle->layer;
910 bo->is_shared = true;
911 return true;
912 }
913
914 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
915 void *pointer, uint64_t size)
916 {
917 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
918 amdgpu_bo_handle buf_handle;
919 struct amdgpu_winsys_bo *bo;
920 uint64_t va;
921 amdgpu_va_handle va_handle;
922
923 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
924 if (!bo)
925 return NULL;
926
927 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
928 goto error;
929
930 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
931 size, 1 << 12, 0, &va, &va_handle, 0))
932 goto error_va_alloc;
933
934 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
935 goto error_va_map;
936
937 /* Initialize it. */
938 pipe_reference_init(&bo->base.reference, 1);
939 bo->bo = buf_handle;
940 bo->base.alignment = 0;
941 bo->base.size = size;
942 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
943 bo->ws = ws;
944 bo->user_ptr = pointer;
945 bo->va = va;
946 bo->u.real.va_handle = va_handle;
947 bo->initial_domain = RADEON_DOMAIN_GTT;
948 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
949
950 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
951
952 amdgpu_add_buffer_to_global_list(bo);
953
954 return (struct pb_buffer*)bo;
955
956 error_va_map:
957 amdgpu_va_range_free(va_handle);
958
959 error_va_alloc:
960 amdgpu_bo_free(buf_handle);
961
962 error:
963 FREE(bo);
964 return NULL;
965 }
966
967 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
968 {
969 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
970 }
971
972 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
973 {
974 return ((struct amdgpu_winsys_bo*)buf)->va;
975 }
976
977 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
978 {
979 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
980 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
981 ws->base.buffer_map = amdgpu_bo_map;
982 ws->base.buffer_unmap = amdgpu_bo_unmap;
983 ws->base.buffer_wait = amdgpu_bo_wait;
984 ws->base.buffer_create = amdgpu_bo_create;
985 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
986 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
987 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
988 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
989 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
990 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
991 }