winsys/amdgpu: add back multithreaded command submission
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 */
31
32 #include "amdgpu_cs.h"
33
34 #include "os/os_time.h"
35 #include "state_tracker/drm_driver.h"
36 #include <amdgpu_drm.h>
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <inttypes.h>
40
41 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
42 enum radeon_bo_usage usage)
43 {
44 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
45 struct amdgpu_winsys *ws = bo->ws;
46 int64_t abs_timeout;
47 int i;
48
49 if (timeout == 0) {
50 if (p_atomic_read(&bo->num_active_ioctls))
51 return false;
52
53 } else {
54 abs_timeout = os_time_get_absolute_timeout(timeout);
55
56 /* Wait if any ioctl is being submitted with this buffer. */
57 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
58 return false;
59 }
60
61 if (bo->is_shared) {
62 /* We can't use user fences for shared buffers, because user fences
63 * are local to this process only. If we want to wait for all buffer
64 * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
65 */
66 bool buffer_busy = true;
67 int r;
68
69 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
70 if (r)
71 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
72 r);
73 return !buffer_busy;
74 }
75
76 if (timeout == 0) {
77 pipe_mutex_lock(ws->bo_fence_lock);
78 for (i = 0; i < RING_LAST; i++)
79 if (bo->fence[i]) {
80 if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
81 /* Release the idle fence to avoid checking it again later. */
82 amdgpu_fence_reference(&bo->fence[i], NULL);
83 } else {
84 pipe_mutex_unlock(ws->bo_fence_lock);
85 return false;
86 }
87 }
88 pipe_mutex_unlock(ws->bo_fence_lock);
89 return true;
90
91 } else {
92 struct pipe_fence_handle *fence[RING_LAST] = {};
93 bool fence_idle[RING_LAST] = {};
94 bool buffer_idle = true;
95
96 /* Take references to all fences, so that we can wait for them
97 * without the lock. */
98 pipe_mutex_lock(ws->bo_fence_lock);
99 for (i = 0; i < RING_LAST; i++)
100 amdgpu_fence_reference(&fence[i], bo->fence[i]);
101 pipe_mutex_unlock(ws->bo_fence_lock);
102
103 /* Now wait for the fences. */
104 for (i = 0; i < RING_LAST; i++) {
105 if (fence[i]) {
106 if (amdgpu_fence_wait(fence[i], abs_timeout, true))
107 fence_idle[i] = true;
108 else
109 buffer_idle = false;
110 }
111 }
112
113 /* Release idle fences to avoid checking them again later. */
114 pipe_mutex_lock(ws->bo_fence_lock);
115 for (i = 0; i < RING_LAST; i++) {
116 if (fence[i] == bo->fence[i] && fence_idle[i])
117 amdgpu_fence_reference(&bo->fence[i], NULL);
118
119 amdgpu_fence_reference(&fence[i], NULL);
120 }
121 pipe_mutex_unlock(ws->bo_fence_lock);
122
123 return buffer_idle;
124 }
125 }
126
127 static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
128 struct pb_buffer *buf)
129 {
130 return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
131 }
132
133 void amdgpu_bo_destroy(struct pb_buffer *_buf)
134 {
135 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
136 int i;
137
138 pipe_mutex_lock(bo->ws->global_bo_list_lock);
139 LIST_DEL(&bo->global_list_item);
140 bo->ws->num_buffers--;
141 pipe_mutex_unlock(bo->ws->global_bo_list_lock);
142
143 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
144 amdgpu_va_range_free(bo->va_handle);
145 amdgpu_bo_free(bo->bo);
146
147 for (i = 0; i < RING_LAST; i++)
148 amdgpu_fence_reference(&bo->fence[i], NULL);
149
150 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
151 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
152 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
153 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
154 FREE(bo);
155 }
156
157 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
158 {
159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
160
161 if (bo->use_reusable_pool)
162 pb_cache_add_buffer(&bo->cache_entry);
163 else
164 amdgpu_bo_destroy(_buf);
165 }
166
167 static void *amdgpu_bo_map(struct pb_buffer *buf,
168 struct radeon_winsys_cs *rcs,
169 enum pipe_transfer_usage usage)
170 {
171 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
172 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
173 int r;
174 void *cpu = NULL;
175
176 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
177 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
178 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
179 if (usage & PIPE_TRANSFER_DONTBLOCK) {
180 if (!(usage & PIPE_TRANSFER_WRITE)) {
181 /* Mapping for read.
182 *
183 * Since we are mapping for read, we don't need to wait
184 * if the GPU is using the buffer for read too
185 * (neither one is changing it).
186 *
187 * Only check whether the buffer is being used for write. */
188 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
189 RADEON_USAGE_WRITE)) {
190 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
191 return NULL;
192 }
193
194 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
195 RADEON_USAGE_WRITE)) {
196 return NULL;
197 }
198 } else {
199 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
200 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
201 return NULL;
202 }
203
204 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
205 RADEON_USAGE_READWRITE)) {
206 return NULL;
207 }
208 }
209 } else {
210 uint64_t time = os_time_get_nano();
211
212 if (!(usage & PIPE_TRANSFER_WRITE)) {
213 /* Mapping for read.
214 *
215 * Since we are mapping for read, we don't need to wait
216 * if the GPU is using the buffer for read too
217 * (neither one is changing it).
218 *
219 * Only check whether the buffer is being used for write. */
220 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
221 RADEON_USAGE_WRITE)) {
222 cs->flush_cs(cs->flush_data, 0, NULL);
223 } else {
224 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
225 if (p_atomic_read(&bo->num_active_ioctls))
226 amdgpu_cs_sync_flush(rcs);
227 }
228 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
229 RADEON_USAGE_WRITE);
230 } else {
231 /* Mapping for write. */
232 if (cs) {
233 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) {
234 cs->flush_cs(cs->flush_data, 0, NULL);
235 } else {
236 /* Try to avoid busy-waiting in amdgpu_bo_wait. */
237 if (p_atomic_read(&bo->num_active_ioctls))
238 amdgpu_cs_sync_flush(rcs);
239 }
240 }
241
242 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
243 RADEON_USAGE_READWRITE);
244 }
245
246 bo->ws->buffer_wait_time += os_time_get_nano() - time;
247 }
248 }
249
250 /* If the buffer is created from user memory, return the user pointer. */
251 if (bo->user_ptr)
252 return bo->user_ptr;
253
254 r = amdgpu_bo_cpu_map(bo->bo, &cpu);
255 if (r) {
256 /* Clear the cache and try again. */
257 pb_cache_release_all_buffers(&bo->ws->bo_cache);
258 r = amdgpu_bo_cpu_map(bo->bo, &cpu);
259 }
260 return r ? NULL : cpu;
261 }
262
263 static void amdgpu_bo_unmap(struct pb_buffer *buf)
264 {
265 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
266
267 amdgpu_bo_cpu_unmap(bo->bo);
268 }
269
270 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
271 amdgpu_bo_destroy_or_cache
272 /* other functions are never called */
273 };
274
275 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
276 {
277 struct amdgpu_winsys *ws = bo->ws;
278
279 pipe_mutex_lock(ws->global_bo_list_lock);
280 LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list);
281 ws->num_buffers++;
282 pipe_mutex_unlock(ws->global_bo_list_lock);
283 }
284
285 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
286 uint64_t size,
287 unsigned alignment,
288 unsigned usage,
289 enum radeon_bo_domain initial_domain,
290 unsigned flags)
291 {
292 struct amdgpu_bo_alloc_request request = {0};
293 amdgpu_bo_handle buf_handle;
294 uint64_t va = 0;
295 struct amdgpu_winsys_bo *bo;
296 amdgpu_va_handle va_handle;
297 int r;
298
299 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
300 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
301 if (!bo) {
302 return NULL;
303 }
304
305 pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base);
306 request.alloc_size = size;
307 request.phys_alignment = alignment;
308
309 if (initial_domain & RADEON_DOMAIN_VRAM)
310 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
311 if (initial_domain & RADEON_DOMAIN_GTT)
312 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
313
314 if (flags & RADEON_FLAG_CPU_ACCESS)
315 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
316 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
317 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
318 if (flags & RADEON_FLAG_GTT_WC)
319 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
320
321 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
322 if (r) {
323 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
324 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
325 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
326 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
327 goto error_bo_alloc;
328 }
329
330 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
331 size, alignment, 0, &va, &va_handle, 0);
332 if (r)
333 goto error_va_alloc;
334
335 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
336 if (r)
337 goto error_va_map;
338
339 pipe_reference_init(&bo->base.reference, 1);
340 bo->base.alignment = alignment;
341 bo->base.usage = usage;
342 bo->base.size = size;
343 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
344 bo->ws = ws;
345 bo->bo = buf_handle;
346 bo->va = va;
347 bo->va_handle = va_handle;
348 bo->initial_domain = initial_domain;
349 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
350
351 if (initial_domain & RADEON_DOMAIN_VRAM)
352 ws->allocated_vram += align64(size, ws->info.gart_page_size);
353 else if (initial_domain & RADEON_DOMAIN_GTT)
354 ws->allocated_gtt += align64(size, ws->info.gart_page_size);
355
356 amdgpu_add_buffer_to_global_list(bo);
357
358 return bo;
359
360 error_va_map:
361 amdgpu_va_range_free(va_handle);
362
363 error_va_alloc:
364 amdgpu_bo_free(buf_handle);
365
366 error_bo_alloc:
367 FREE(bo);
368 return NULL;
369 }
370
371 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
372 {
373 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
374
375 if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
376 return false;
377 }
378
379 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
380 }
381
382 static unsigned eg_tile_split(unsigned tile_split)
383 {
384 switch (tile_split) {
385 case 0: tile_split = 64; break;
386 case 1: tile_split = 128; break;
387 case 2: tile_split = 256; break;
388 case 3: tile_split = 512; break;
389 default:
390 case 4: tile_split = 1024; break;
391 case 5: tile_split = 2048; break;
392 case 6: tile_split = 4096; break;
393 }
394 return tile_split;
395 }
396
397 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
398 {
399 switch (eg_tile_split) {
400 case 64: return 0;
401 case 128: return 1;
402 case 256: return 2;
403 case 512: return 3;
404 default:
405 case 1024: return 4;
406 case 2048: return 5;
407 case 4096: return 6;
408 }
409 }
410
411 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
412 struct radeon_bo_metadata *md)
413 {
414 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
415 struct amdgpu_bo_info info = {0};
416 uint32_t tiling_flags;
417 int r;
418
419 r = amdgpu_bo_query_info(bo->bo, &info);
420 if (r)
421 return;
422
423 tiling_flags = info.metadata.tiling_info;
424
425 md->microtile = RADEON_LAYOUT_LINEAR;
426 md->macrotile = RADEON_LAYOUT_LINEAR;
427
428 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
429 md->macrotile = RADEON_LAYOUT_TILED;
430 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
431 md->microtile = RADEON_LAYOUT_TILED;
432
433 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
434 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
435 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
436 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
437 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
438 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
439 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
440
441 md->size_metadata = info.metadata.size_metadata;
442 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
443 }
444
445 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
446 struct radeon_bo_metadata *md)
447 {
448 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
449 struct amdgpu_bo_metadata metadata = {0};
450 uint32_t tiling_flags = 0;
451
452 if (md->macrotile == RADEON_LAYOUT_TILED)
453 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
454 else if (md->microtile == RADEON_LAYOUT_TILED)
455 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
456 else
457 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
458
459 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
460 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
461 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
462 if (md->tile_split)
463 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split));
464 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
465 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
466
467 if (md->scanout)
468 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
469 else
470 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
471
472 metadata.tiling_info = tiling_flags;
473 metadata.size_metadata = md->size_metadata;
474 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
475
476 amdgpu_bo_set_metadata(bo->bo, &metadata);
477 }
478
479 static struct pb_buffer *
480 amdgpu_bo_create(struct radeon_winsys *rws,
481 uint64_t size,
482 unsigned alignment,
483 enum radeon_bo_domain domain,
484 enum radeon_bo_flag flags)
485 {
486 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
487 struct amdgpu_winsys_bo *bo;
488 unsigned usage = 0;
489
490 /* Align size to page size. This is the minimum alignment for normal
491 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
492 * like constant/uniform buffers, can benefit from better and more reuse.
493 */
494 size = align64(size, ws->info.gart_page_size);
495 alignment = align(alignment, ws->info.gart_page_size);
496
497 /* Only set one usage bit each for domains and flags, or the cache manager
498 * might consider different sets of domains / flags compatible
499 */
500 if (domain == RADEON_DOMAIN_VRAM_GTT)
501 usage = 1 << 2;
502 else
503 usage = domain >> 1;
504 assert(flags < sizeof(usage) * 8 - 3);
505 usage |= 1 << (flags + 3);
506
507 /* Get a buffer from the cache. */
508 bo = (struct amdgpu_winsys_bo*)
509 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage);
510 if (bo)
511 return &bo->base;
512
513 /* Create a new one. */
514 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
515 if (!bo) {
516 /* Clear the cache and try again. */
517 pb_cache_release_all_buffers(&ws->bo_cache);
518 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
519 if (!bo)
520 return NULL;
521 }
522
523 bo->use_reusable_pool = true;
524 return &bo->base;
525 }
526
527 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
528 struct winsys_handle *whandle,
529 unsigned *stride,
530 unsigned *offset)
531 {
532 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
533 struct amdgpu_winsys_bo *bo;
534 enum amdgpu_bo_handle_type type;
535 struct amdgpu_bo_import_result result = {0};
536 uint64_t va;
537 amdgpu_va_handle va_handle;
538 struct amdgpu_bo_info info = {0};
539 enum radeon_bo_domain initial = 0;
540 int r;
541
542 /* Initialize the structure. */
543 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
544 if (!bo) {
545 return NULL;
546 }
547
548 switch (whandle->type) {
549 case DRM_API_HANDLE_TYPE_SHARED:
550 type = amdgpu_bo_handle_type_gem_flink_name;
551 break;
552 case DRM_API_HANDLE_TYPE_FD:
553 type = amdgpu_bo_handle_type_dma_buf_fd;
554 break;
555 default:
556 return NULL;
557 }
558
559 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
560 if (r)
561 goto error;
562
563 /* Get initial domains. */
564 r = amdgpu_bo_query_info(result.buf_handle, &info);
565 if (r)
566 goto error_query;
567
568 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
569 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
570 if (r)
571 goto error_query;
572
573 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
574 if (r)
575 goto error_va_map;
576
577 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
578 initial |= RADEON_DOMAIN_VRAM;
579 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
580 initial |= RADEON_DOMAIN_GTT;
581
582
583 pipe_reference_init(&bo->base.reference, 1);
584 bo->base.alignment = info.phys_alignment;
585 bo->bo = result.buf_handle;
586 bo->base.size = result.alloc_size;
587 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
588 bo->ws = ws;
589 bo->va = va;
590 bo->va_handle = va_handle;
591 bo->initial_domain = initial;
592 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
593 bo->is_shared = true;
594
595 if (stride)
596 *stride = whandle->stride;
597 if (offset)
598 *offset = whandle->offset;
599
600 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
601 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
602 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
603 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
604
605 amdgpu_add_buffer_to_global_list(bo);
606
607 return &bo->base;
608
609 error_va_map:
610 amdgpu_va_range_free(va_handle);
611
612 error_query:
613 amdgpu_bo_free(result.buf_handle);
614
615 error:
616 FREE(bo);
617 return NULL;
618 }
619
620 static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
621 unsigned stride, unsigned offset,
622 unsigned slice_size,
623 struct winsys_handle *whandle)
624 {
625 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
626 enum amdgpu_bo_handle_type type;
627 int r;
628
629 bo->use_reusable_pool = false;
630
631 switch (whandle->type) {
632 case DRM_API_HANDLE_TYPE_SHARED:
633 type = amdgpu_bo_handle_type_gem_flink_name;
634 break;
635 case DRM_API_HANDLE_TYPE_FD:
636 type = amdgpu_bo_handle_type_dma_buf_fd;
637 break;
638 case DRM_API_HANDLE_TYPE_KMS:
639 type = amdgpu_bo_handle_type_kms;
640 break;
641 default:
642 return FALSE;
643 }
644
645 r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
646 if (r)
647 return FALSE;
648
649 whandle->stride = stride;
650 whandle->offset = offset;
651 whandle->offset += slice_size * whandle->layer;
652 bo->is_shared = true;
653 return TRUE;
654 }
655
656 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
657 void *pointer, uint64_t size)
658 {
659 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
660 amdgpu_bo_handle buf_handle;
661 struct amdgpu_winsys_bo *bo;
662 uint64_t va;
663 amdgpu_va_handle va_handle;
664
665 bo = CALLOC_STRUCT(amdgpu_winsys_bo);
666 if (!bo)
667 return NULL;
668
669 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
670 goto error;
671
672 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
673 size, 1 << 12, 0, &va, &va_handle, 0))
674 goto error_va_alloc;
675
676 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
677 goto error_va_map;
678
679 /* Initialize it. */
680 pipe_reference_init(&bo->base.reference, 1);
681 bo->bo = buf_handle;
682 bo->base.alignment = 0;
683 bo->base.size = size;
684 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
685 bo->ws = ws;
686 bo->user_ptr = pointer;
687 bo->va = va;
688 bo->va_handle = va_handle;
689 bo->initial_domain = RADEON_DOMAIN_GTT;
690 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
691
692 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
693
694 amdgpu_add_buffer_to_global_list(bo);
695
696 return (struct pb_buffer*)bo;
697
698 error_va_map:
699 amdgpu_va_range_free(va_handle);
700
701 error_va_alloc:
702 amdgpu_bo_free(buf_handle);
703
704 error:
705 FREE(bo);
706 return NULL;
707 }
708
709 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
710 {
711 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
712 }
713
714 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
715 {
716 return ((struct amdgpu_winsys_bo*)buf)->va;
717 }
718
719 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
720 {
721 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
722 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata;
723 ws->base.buffer_map = amdgpu_bo_map;
724 ws->base.buffer_unmap = amdgpu_bo_unmap;
725 ws->base.buffer_wait = amdgpu_bo_wait;
726 ws->base.buffer_create = amdgpu_bo_create;
727 ws->base.buffer_from_handle = amdgpu_bo_from_handle;
728 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
729 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr;
730 ws->base.buffer_get_handle = amdgpu_bo_get_handle;
731 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
732 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
733 }