src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c

   1 /*
   2  * Copyright © 2016 Red Hat.
   3  * Copyright © 2016 Bas Nieuwenhuizen
   4  *
   5  * based on amdgpu winsys.
   6  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
   7  * Copyright © 2015 Advanced Micro Devices, Inc.
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice (including the next
  17  * paragraph) shall be included in all copies or substantial portions of the
  18  * Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  26  * IN THE SOFTWARE.
  27  */
  28
  29 #include <stdio.h>
  30
  31 #include "radv_amdgpu_bo.h"
  32
  33 #include <amdgpu.h>
  34 #include "drm-uapi/amdgpu_drm.h"
  35 #include <inttypes.h>
  36 #include <pthread.h>
  37 #include <unistd.h>
  38
  39 #include "util/u_atomic.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42
  43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
  44
  45 static int
  46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
  47                      amdgpu_bo_handle bo,
  48                      uint64_t offset,
  49                      uint64_t size,
  50                      uint64_t addr,
  51                      uint32_t bo_flags,
  52                      uint64_t internal_flags,
  53                      uint32_t ops)
  54 {
  55         uint64_t flags = internal_flags;
  56         if (bo) {
  57                 flags = AMDGPU_VM_PAGE_READABLE |
  58                          AMDGPU_VM_PAGE_EXECUTABLE;
  59
  60                 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) &&
  61                     ws->info.chip_class >= GFX9)
  62                         flags |= AMDGPU_VM_MTYPE_UC;
  63
  64                 if (!(bo_flags & RADEON_FLAG_READ_ONLY))
  65                         flags |= AMDGPU_VM_PAGE_WRITEABLE;
  66         }
  67
  68         size = align64(size, getpagesize());
  69
  70         return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
  71                                    flags, ops);
  72 }
  73
  74 static void
  75 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
  76                                const struct radv_amdgpu_map_range *range)
  77 {
  78         uint64_t internal_flags = 0;
  79         assert(range->size);
  80
  81         if (!range->bo) {
  82                 if (!bo->ws->info.has_sparse_vm_mappings)
  83                         return;
  84
  85                 internal_flags |= AMDGPU_VM_PAGE_PRT;
  86         } else
  87                 p_atomic_inc(&range->bo->ref_count);
  88
  89         int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
  90                                      range->bo_offset, range->size,
  91                                      range->offset + bo->base.va, 0,
  92                                      internal_flags, AMDGPU_VA_OP_MAP);
  93         if (r)
  94                 abort();
  95 }
  96
  97 static void
  98 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
  99                                  const struct radv_amdgpu_map_range *range)
 100 {
 101         uint64_t internal_flags = 0;
 102         assert(range->size);
 103
 104         if (!range->bo) {
 105                 if(!bo->ws->info.has_sparse_vm_mappings)
 106                         return;
 107
 108                 /* Even though this is an unmap, if we don't set this flag,
 109                    AMDGPU is going to complain about the missing buffer. */
 110                 internal_flags |= AMDGPU_VM_PAGE_PRT;
 111         }
 112
 113         int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
 114                                      range->bo_offset, range->size,
 115                                      range->offset + bo->base.va, 0, internal_flags,
 116                                      AMDGPU_VA_OP_UNMAP);
 117         if (r)
 118                 abort();
 119
 120         if (range->bo)
 121                 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
 122 }
 123
 124 static int bo_comparator(const void *ap, const void *bp) {
 125         struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
 126         struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
 127         return (a > b) ? 1 : (a < b) ? -1 : 0;
 128 }
 129
 130 static void
 131 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
 132 {
 133         if (bo->bo_capacity < bo->range_count) {
 134                 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
 135                 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
 136                 bo->bo_capacity = new_count;
 137         }
 138
 139         uint32_t temp_bo_count = 0;
 140         for (uint32_t i = 0; i < bo->range_count; ++i)
 141                 if (bo->ranges[i].bo)
 142                         bo->bos[temp_bo_count++] = bo->ranges[i].bo;
 143
 144         qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
 145
 146         uint32_t final_bo_count = 1;
 147         for (uint32_t i = 1; i < temp_bo_count; ++i)
 148                 if (bo->bos[i] != bo->bos[i - 1])
 149                         bo->bos[final_bo_count++] = bo->bos[i];
 150
 151         bo->bo_count = final_bo_count;
 152 }
 153
 154 static void
 155 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
 156                                    uint64_t offset, uint64_t size,
 157                                    struct radeon_winsys_bo *_bo, uint64_t bo_offset)
 158 {
 159         struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
 160         struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
 161         int range_count_delta, new_idx;
 162         int first = 0, last;
 163         struct radv_amdgpu_map_range new_first, new_last;
 164
 165         assert(parent->is_virtual);
 166         assert(!bo || !bo->is_virtual);
 167
 168         if (!size)
 169                 return;
 170
 171         /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
 172         if (parent->range_capacity - parent->range_count < 2) {
 173                 parent->range_capacity += 2;
 174                 parent->ranges = realloc(parent->ranges,
 175                                          parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
 176         }
 177
 178         /*
 179          * [first, last] is exactly the range of ranges that either overlap the
 180          * new parent, or are adjacent to it. This corresponds to the bind ranges
 181          * that may change.
 182          */
 183         while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
 184                 ++first;
 185
 186         last = first;
 187         while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
 188                 ++last;
 189
 190         /* Whether the first or last range are going to be totally removed or just
 191          * resized/left alone. Note that in the case of first == last, we will split
 192          * this into a part before and after the new range. The remove flag is then
 193          * whether to not create the corresponding split part. */
 194         bool remove_first = parent->ranges[first].offset == offset;
 195         bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
 196         bool unmapped_first = false;
 197
 198         assert(parent->ranges[first].offset <= offset);
 199         assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
 200
 201         /* Try to merge the new range with the first range. */
 202         if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
 203                 size += offset - parent->ranges[first].offset;
 204                 offset = parent->ranges[first].offset;
 205                 bo_offset = parent->ranges[first].bo_offset;
 206                 remove_first = true;
 207         }
 208
 209         /* Try to merge the new range with the last range. */
 210         if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
 211                 size = parent->ranges[last].offset + parent->ranges[last].size - offset;
 212                 remove_last = true;
 213         }
 214
 215         range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
 216         new_idx = first + !remove_first;
 217
 218         /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
 219         for (int i = first + 1; i < last; ++i)
 220                 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
 221
 222         /* If the first/last range are not left alone we unmap then and optionally map
 223          * them again after modifications. Not that this implicitly can do the splitting
 224          * if first == last. */
 225         new_first = parent->ranges[first];
 226         new_last = parent->ranges[last];
 227
 228         if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
 229                 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
 230                 unmapped_first = true;
 231
 232                 if (!remove_first) {
 233                         new_first.size = offset - new_first.offset;
 234                         radv_amdgpu_winsys_virtual_map(parent, &new_first);
 235                 }
 236         }
 237
 238         if (parent->ranges[last].offset < offset + size || remove_last) {
 239                 if (first != last || !unmapped_first)
 240                         radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
 241
 242                 if (!remove_last) {
 243                         new_last.size -= offset + size - new_last.offset;
 244                         new_last.offset = offset + size;
 245                         radv_amdgpu_winsys_virtual_map(parent, &new_last);
 246                 }
 247         }
 248
 249         /* Moves the range list after last to account for the changed number of ranges. */
 250         memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
 251                 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
 252
 253         if (!remove_first)
 254                 parent->ranges[first] = new_first;
 255
 256         if (!remove_last)
 257                 parent->ranges[new_idx + 1] = new_last;
 258
 259         /* Actually set up the new range. */
 260         parent->ranges[new_idx].offset = offset;
 261         parent->ranges[new_idx].size = size;
 262         parent->ranges[new_idx].bo = bo;
 263         parent->ranges[new_idx].bo_offset = bo_offset;
 264
 265         radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
 266
 267         parent->range_count += range_count_delta;
 268
 269         radv_amdgpu_winsys_rebuild_bo_list(parent);
 270 }
 271
 272 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 273 {
 274         struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 275         struct radv_amdgpu_winsys *ws = bo->ws;
 276
 277         if (p_atomic_dec_return(&bo->ref_count))
 278                 return;
 279         if (bo->is_virtual) {
 280                 for (uint32_t i = 0; i < bo->range_count; ++i) {
 281                         radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
 282                 }
 283                 free(bo->bos);
 284                 free(bo->ranges);
 285         } else {
 286                 if (bo->ws->debug_all_bos) {
 287                         pthread_mutex_lock(&bo->ws->global_bo_list_lock);
 288                         list_del(&bo->global_list_item);
 289                         bo->ws->num_buffers--;
 290                         pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
 291                 }
 292                 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
 293                                      0, 0, AMDGPU_VA_OP_UNMAP);
 294                 amdgpu_bo_free(bo->bo);
 295         }
 296
 297         if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
 298                 if (bo->base.vram_no_cpu_access) {
 299                         p_atomic_add(&ws->allocated_vram,
 300                                      -align64(bo->size, ws->info.gart_page_size));
 301                 } else {
 302                         p_atomic_add(&ws->allocated_vram_vis,
 303                                      -align64(bo->size, ws->info.gart_page_size));
 304                 }
 305         }
 306
 307         if (bo->initial_domain & RADEON_DOMAIN_GTT)
 308                 p_atomic_add(&ws->allocated_gtt,
 309                              -align64(bo->size, ws->info.gart_page_size));
 310
 311         amdgpu_va_range_free(bo->va_handle);
 312         FREE(bo);
 313 }
 314
 315 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
 316 {
 317         struct radv_amdgpu_winsys *ws = bo->ws;
 318
 319         if (bo->ws->debug_all_bos) {
 320                 pthread_mutex_lock(&ws->global_bo_list_lock);
 321                 list_addtail(&bo->global_list_item, &ws->global_bo_list);
 322                 ws->num_buffers++;
 323                 pthread_mutex_unlock(&ws->global_bo_list_lock);
 324         }
 325 }
 326
 327 static struct radeon_winsys_bo *
 328 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
 329                              uint64_t size,
 330                              unsigned alignment,
 331                              enum radeon_bo_domain initial_domain,
 332                              unsigned flags,
 333                              unsigned priority)
 334 {
 335         struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 336         struct radv_amdgpu_winsys_bo *bo;
 337         struct amdgpu_bo_alloc_request request = {0};
 338         amdgpu_bo_handle buf_handle;
 339         uint64_t va = 0;
 340         amdgpu_va_handle va_handle;
 341         int r;
 342         bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
 343         if (!bo) {
 344                 return NULL;
 345         }
 346
 347         unsigned virt_alignment = alignment;
 348         if (size >= ws->info.pte_fragment_size)
 349                 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
 350
 351         r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
 352                                   size, virt_alignment, 0, &va, &va_handle,
 353                                   (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
 354                                    AMDGPU_VA_RANGE_HIGH);
 355         if (r)
 356                 goto error_va_alloc;
 357
 358         bo->base.va = va;
 359         bo->va_handle = va_handle;
 360         bo->size = size;
 361         bo->ws = ws;
 362         bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
 363         bo->ref_count = 1;
 364
 365         if (flags & RADEON_FLAG_VIRTUAL) {
 366                 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
 367                 bo->range_count = 1;
 368                 bo->range_capacity = 1;
 369
 370                 bo->ranges[0].offset = 0;
 371                 bo->ranges[0].size = size;
 372                 bo->ranges[0].bo = NULL;
 373                 bo->ranges[0].bo_offset = 0;
 374
 375                 radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
 376                 return (struct radeon_winsys_bo *)bo;
 377         }
 378
 379         request.alloc_size = size;
 380         request.phys_alignment = alignment;
 381
 382         if (initial_domain & RADEON_DOMAIN_VRAM)
 383                 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
 384         if (initial_domain & RADEON_DOMAIN_GTT)
 385                 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
 386         if (initial_domain & RADEON_DOMAIN_GDS)
 387                 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
 388         if (initial_domain & RADEON_DOMAIN_OA)
 389                 request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
 390
 391         if (flags & RADEON_FLAG_CPU_ACCESS)
 392                 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 393         if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
 394                 bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
 395                 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 396         }
 397         if (flags & RADEON_FLAG_GTT_WC)
 398                 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 399         if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
 400                 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 401         if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
 402             ws->info.has_local_buffers &&
 403             (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
 404                 bo->base.is_local = true;
 405                 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
 406         }
 407
 408         /* this won't do anything on pre 4.9 kernels */
 409         if (initial_domain & RADEON_DOMAIN_VRAM) {
 410                 if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
 411                         request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
 412         }
 413
 414         r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
 415         if (r) {
 416                 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
 417                 fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
 418                 fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
 419                 fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
 420                 goto error_bo_alloc;
 421         }
 422
 423         r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0,
 424                                  AMDGPU_VA_OP_MAP);
 425         if (r)
 426                 goto error_va_map;
 427
 428         bo->bo = buf_handle;
 429         bo->initial_domain = initial_domain;
 430         bo->is_shared = false;
 431         bo->priority = priority;
 432
 433         r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
 434         assert(!r);
 435
 436         if (initial_domain & RADEON_DOMAIN_VRAM) {
 437                 /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
 438                  * aren't mappable and they are counted as part of the VRAM
 439                  * counter.
 440                  *
 441                  * Otherwise, buffers with the CPU_ACCESS flag or without any
 442                  * of both (imported buffers) are counted as part of the VRAM
 443                  * visible counter because they can be mapped.
 444                  */
 445                 if (bo->base.vram_no_cpu_access) {
 446                         p_atomic_add(&ws->allocated_vram,
 447                                      align64(bo->size, ws->info.gart_page_size));
 448                 } else {
 449                         p_atomic_add(&ws->allocated_vram_vis,
 450                                      align64(bo->size, ws->info.gart_page_size));
 451                 }
 452         }
 453
 454         if (initial_domain & RADEON_DOMAIN_GTT)
 455                 p_atomic_add(&ws->allocated_gtt,
 456                              align64(bo->size, ws->info.gart_page_size));
 457
 458         radv_amdgpu_add_buffer_to_global_list(bo);
 459         return (struct radeon_winsys_bo *)bo;
 460 error_va_map:
 461         amdgpu_bo_free(buf_handle);
 462
 463 error_bo_alloc:
 464         amdgpu_va_range_free(va_handle);
 465
 466 error_va_alloc:
 467         FREE(bo);
 468         return NULL;
 469 }
 470
 471 static void *
 472 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
 473 {
 474         struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 475         int ret;
 476         void *data;
 477         ret = amdgpu_bo_cpu_map(bo->bo, &data);
 478         if (ret)
 479                 return NULL;
 480         return data;
 481 }
 482
 483 static void
 484 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
 485 {
 486         struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 487         amdgpu_bo_cpu_unmap(bo->bo);
 488 }
 489
 490 static uint64_t
 491 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
 492                                      uint64_t size, unsigned alignment)
 493 {
 494         uint64_t vm_alignment = alignment;
 495
 496         /* Increase the VM alignment for faster address translation. */
 497         if (size >= ws->info.pte_fragment_size)
 498                 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
 499
 500         /* Gfx9: Increase the VM alignment to the most significant bit set
 501          * in the size for faster address translation.
 502          */
 503         if (ws->info.chip_class >= GFX9) {
 504                 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
 505                 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
 506
 507                 vm_alignment = MAX2(vm_alignment, msb_alignment);
 508         }
 509         return vm_alignment;
 510 }
 511
 512 static struct radeon_winsys_bo *
 513 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
 514                                void *pointer,
 515                                uint64_t size,
 516                                unsigned priority)
 517 {
 518         struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 519         amdgpu_bo_handle buf_handle;
 520         struct radv_amdgpu_winsys_bo *bo;
 521         uint64_t va;
 522         amdgpu_va_handle va_handle;
 523         uint64_t vm_alignment;
 524
 525         bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
 526         if (!bo)
 527                 return NULL;
 528
 529         if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
 530                 goto error;
 531
 532         /* Using the optimal VM alignment also fixes GPU hangs for buffers that
 533          * are imported.
 534          */
 535         vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
 536                                                             ws->info.gart_page_size);
 537
 538         if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
 539                                   size, vm_alignment, 0, &va, &va_handle,
 540                                   AMDGPU_VA_RANGE_HIGH))
 541                 goto error_va_alloc;
 542
 543         if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
 544                 goto error_va_map;
 545
 546         /* Initialize it */
 547         bo->base.va = va;
 548         bo->va_handle = va_handle;
 549         bo->size = size;
 550         bo->ref_count = 1;
 551         bo->ws = ws;
 552         bo->bo = buf_handle;
 553         bo->initial_domain = RADEON_DOMAIN_GTT;
 554         bo->priority = priority;
 555
 556         ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
 557         assert(!r);
 558
 559         p_atomic_add(&ws->allocated_gtt,
 560                      align64(bo->size, ws->info.gart_page_size));
 561
 562         radv_amdgpu_add_buffer_to_global_list(bo);
 563         return (struct radeon_winsys_bo *)bo;
 564
 565 error_va_map:
 566         amdgpu_va_range_free(va_handle);
 567
 568 error_va_alloc:
 569         amdgpu_bo_free(buf_handle);
 570
 571 error:
 572         FREE(bo);
 573         return NULL;
 574 }
 575
 576 static struct radeon_winsys_bo *
 577 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
 578                               int fd, unsigned priority,
 579                               uint64_t *alloc_size)
 580 {
 581         struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 582         struct radv_amdgpu_winsys_bo *bo;
 583         uint64_t va;
 584         amdgpu_va_handle va_handle;
 585         enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
 586         struct amdgpu_bo_import_result result = {0};
 587         struct amdgpu_bo_info info = {0};
 588         enum radeon_bo_domain initial = 0;
 589         int r;
 590         bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
 591         if (!bo)
 592                 return NULL;
 593
 594         r = amdgpu_bo_import(ws->dev, type, fd, &result);
 595         if (r)
 596                 goto error;
 597
 598         r = amdgpu_bo_query_info(result.buf_handle, &info);
 599         if (r)
 600                 goto error_query;
 601
 602         if (alloc_size) {
 603                 *alloc_size = info.alloc_size;
 604         }
 605
 606         r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
 607                                   result.alloc_size, 1 << 20, 0, &va, &va_handle,
 608                                   AMDGPU_VA_RANGE_HIGH);
 609         if (r)
 610                 goto error_query;
 611
 612         r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
 613                                  va, 0, 0, AMDGPU_VA_OP_MAP);
 614         if (r)
 615                 goto error_va_map;
 616
 617         if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
 618                 initial |= RADEON_DOMAIN_VRAM;
 619         if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
 620                 initial |= RADEON_DOMAIN_GTT;
 621
 622         bo->bo = result.buf_handle;
 623         bo->base.va = va;
 624         bo->va_handle = va_handle;
 625         bo->initial_domain = initial;
 626         bo->size = result.alloc_size;
 627         bo->is_shared = true;
 628         bo->ws = ws;
 629         bo->priority = priority;
 630         bo->ref_count = 1;
 631
 632         r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
 633         assert(!r);
 634
 635         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 636                 p_atomic_add(&ws->allocated_vram,
 637                              align64(bo->size, ws->info.gart_page_size));
 638         if (bo->initial_domain & RADEON_DOMAIN_GTT)
 639                 p_atomic_add(&ws->allocated_gtt,
 640                              align64(bo->size, ws->info.gart_page_size));
 641
 642         radv_amdgpu_add_buffer_to_global_list(bo);
 643         return (struct radeon_winsys_bo *)bo;
 644 error_va_map:
 645         amdgpu_va_range_free(va_handle);
 646
 647 error_query:
 648         amdgpu_bo_free(result.buf_handle);
 649
 650 error:
 651         FREE(bo);
 652         return NULL;
 653 }
 654
 655 static bool
 656 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
 657                           struct radeon_winsys_bo *_bo,
 658                           int *fd)
 659 {
 660         struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 661         enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
 662         int r;
 663         unsigned handle;
 664         r = amdgpu_bo_export(bo->bo, type, &handle);
 665         if (r)
 666                 return false;
 667
 668         *fd = (int)handle;
 669         bo->is_shared = true;
 670         return true;
 671 }
 672
 673 static bool
 674 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
 675                                  enum radeon_bo_domain *domains,
 676                                  enum radeon_bo_flag *flags)
 677 {
 678         struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 679         struct amdgpu_bo_import_result result = {0};
 680         struct amdgpu_bo_info info = {0};
 681         int r;
 682
 683         *domains = 0;
 684         *flags = 0;
 685
 686         r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
 687         if (r)
 688                 return false;
 689
 690         r = amdgpu_bo_query_info(result.buf_handle, &info);
 691         amdgpu_bo_free(result.buf_handle);
 692         if (r)
 693                 return false;
 694
 695         if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
 696                 *domains |= RADEON_DOMAIN_VRAM;
 697         if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
 698                 *domains |= RADEON_DOMAIN_GTT;
 699         if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
 700                 *domains |= RADEON_DOMAIN_GDS;
 701         if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
 702                 *domains |= RADEON_DOMAIN_OA;
 703
 704         if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 705                 *flags |= RADEON_FLAG_CPU_ACCESS;
 706         if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 707                 *flags |= RADEON_FLAG_NO_CPU_ACCESS;
 708         if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
 709                 *flags |= RADEON_FLAG_IMPLICIT_SYNC;
 710         if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 711                 *flags |= RADEON_FLAG_GTT_WC;
 712         if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
 713                 *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
 714         if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
 715                 *flags |= RADEON_FLAG_ZERO_VRAM;
 716         return true;
 717 }
 718
 719 static unsigned eg_tile_split(unsigned tile_split)
 720 {
 721         switch (tile_split) {
 722         case 0:     tile_split = 64;    break;
 723         case 1:     tile_split = 128;   break;
 724         case 2:     tile_split = 256;   break;
 725         case 3:     tile_split = 512;   break;
 726         default:
 727         case 4:     tile_split = 1024;  break;
 728         case 5:     tile_split = 2048;  break;
 729         case 6:     tile_split = 4096;  break;
 730         }
 731         return tile_split;
 732 }
 733
 734 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
 735 {
 736         switch (eg_tile_split) {
 737         case 64:    return 0;
 738         case 128:   return 1;
 739         case 256:   return 2;
 740         case 512:   return 3;
 741         default:
 742         case 1024:  return 4;
 743         case 2048:  return 5;
 744         case 4096:  return 6;
 745         }
 746 }
 747
 748 static void
 749 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
 750                                    struct radeon_bo_metadata *md)
 751 {
 752         struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 753         struct amdgpu_bo_metadata metadata = {0};
 754         uint64_t tiling_flags = 0;
 755
 756         if (bo->ws->info.chip_class >= GFX9) {
 757                 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
 758                 tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
 759         } else {
 760                 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
 761                         tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
 762                 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
 763                         tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
 764                 else
 765                         tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
 766
 767                 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
 768                 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
 769                 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
 770                 if (md->u.legacy.tile_split)
 771                         tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
 772                 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
 773                 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
 774
 775                 if (md->u.legacy.scanout)
 776                         tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
 777                 else
 778                         tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
 779         }
 780
 781         metadata.tiling_info = tiling_flags;
 782         metadata.size_metadata = md->size_metadata;
 783         memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
 784
 785         amdgpu_bo_set_metadata(bo->bo, &metadata);
 786 }
 787
 788 static void
 789 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
 790                                    struct radeon_bo_metadata *md)
 791 {
 792         struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 793         struct amdgpu_bo_info info = {0};
 794
 795         int r = amdgpu_bo_query_info(bo->bo, &info);
 796         if (r)
 797                 return;
 798
 799         uint64_t tiling_flags = info.metadata.tiling_info;
 800
 801         if (bo->ws->info.chip_class >= GFX9) {
 802                 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
 803                 md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
 804         } else {
 805                 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
 806                 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
 807
 808                 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4)  /* 2D_TILED_THIN1 */
 809                         md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
 810                 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
 811                         md->u.legacy.microtile = RADEON_LAYOUT_TILED;
 812
 813                 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
 814                 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
 815                 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
 816                 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
 817                 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
 818                 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
 819                 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
 820         }
 821
 822         md->size_metadata = info.metadata.size_metadata;
 823         memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
 824 }
 825
 826 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
 827 {
 828         ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
 829         ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
 830         ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
 831         ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
 832         ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
 833         ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
 834         ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
 835         ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
 836         ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
 837         ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
 838         ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
 839 }