winsys/amdgpu: explicitly declare whether buffer_map is permanent or not
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 21 Nov 2018 17:17:02 +0000 (18:17 +0100)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 28 Nov 2018 17:24:14 +0000 (18:24 +0100)
Introduce a new driver-private transfer flag RADEON_TRANSFER_TEMPORARY
that specifies whether the caller will use buffer_unmap or not. The
default behavior is set to permanent maps, because that's what drivers
do for Gallium buffer maps.

This should eliminate the need for hacks in libdrm. Assertions are added
to catch when the buffer_unmap calls don't match the (temporary)
buffer_map calls.

I did my best to update r600 for consistency (r300 needs no changes
because it never calls buffer_unmap), even though the radeon winsys
ignores the new flag.

As an added bonus, this should actually improve the performance of
the normal fast path, because we no longer call into libdrm at all
after the first map, and there's one less atomic in the winsys itself
(there are now no atomics left in the UNSYNCHRONIZED fast path).

Cc: Leo Liu <leo.liu@amd.com>
v2:
- remove comment about visible VRAM (Marek)
- don't rely on amdgpu_bo_cpu_map doing an atomic write
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
17 files changed:
src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/radeon_uvd.c
src/gallium/drivers/r600/radeon_vce.c
src/gallium/drivers/r600/radeon_video.c
src/gallium/drivers/radeon/radeon_uvd.c
src/gallium/drivers/radeon/radeon_uvd_enc.c
src/gallium/drivers/radeon/radeon_vce.c
src/gallium/drivers/radeon/radeon_vcn_dec.c
src/gallium/drivers/radeon/radeon_vcn_enc.c
src/gallium/drivers/radeon/radeon_video.c
src/gallium/drivers/radeon/radeon_winsys.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/include/pipe/p_defines.h
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h

index a77f58242e35d506524e494e4c06ec8f2fab909d..9085be4e2f33c0077b741bdafc032795e015110d 100644 (file)
@@ -438,7 +438,9 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,
        /* Upload code + ROdata */
        shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
                                                        shader->bc.ndw * 4);
-       p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+       p = r600_buffer_map_sync_with_rings(
+               &rctx->b, shader->code_bo,
+               PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
        //TODO: use util_memcpy_cpu_to_le32 ?
        memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
        rctx->b.ws->buffer_unmap(shader->code_bo->buf);
index 7029be24f4b69a096134e5c050ae4c0191b92613..4ba77c535f9b05cd44bb48ddb51094bd8608ea71 100644 (file)
@@ -2772,7 +2772,9 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
                return NULL;
        }
 
-       bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+       bytecode = r600_buffer_map_sync_with_rings
+               (&rctx->b, shader->buffer,
+               PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
        bytecode += shader->offset / 4;
 
        if (R600_BIG_ENDIAN) {
index 408939d11056a59f62251a86591bcb34b4656a30..fc826470d69962284bd9a00bfa1acb4bb0c04dfa 100644 (file)
@@ -141,7 +141,9 @@ static int store_shader(struct pipe_context *ctx,
                if (shader->bo == NULL) {
                        return -ENOMEM;
                }
-               ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
+               ptr = r600_buffer_map_sync_with_rings(
+                       &rctx->b, shader->bo,
+                       PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
                if (R600_BIG_ENDIAN) {
                        for (i = 0; i < shader->shader.bc.ndw; ++i) {
                                ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
index 495a93dc55a1b15cef050551d7d012fb2de01bfe..5568f2138e483d63549a45354f30e10ff6e89d8e 100644 (file)
@@ -152,7 +152,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
        buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 
        /* and map it for CPU access */
-       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+                                  PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 
        /* calc buffer offsets */
        dec->msg = (struct ruvd_msg *)ptr;
@@ -1068,7 +1069,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,
        dec->bs_size = 0;
        dec->bs_ptr = dec->ws->buffer_map(
                dec->bs_buffers[dec->cur_buffer].res->buf,
-               dec->cs, PIPE_TRANSFER_WRITE);
+               dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 }
 
 /**
@@ -1121,7 +1122,8 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
                        }
 
                        dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
-                                                         PIPE_TRANSFER_WRITE);
+                                                         PIPE_TRANSFER_WRITE |
+                                                         RADEON_TRANSFER_TEMPORARY);
                        if (!dec->bs_ptr)
                                return;
 
index 60ba12a593a637fa4880ee631bc802ecc23be82d..e38b927b1d4da22e8a260764683ec93f9a25e8d1 100644 (file)
@@ -353,7 +353,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
        struct rvid_buffer *fb = feedback;
 
        if (size) {
-               uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+               uint32_t *ptr = enc->ws->buffer_map(
+                       fb->res->buf, enc->cs,
+                       PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 
                if (ptr[1]) {
                        *size = ptr[4] - ptr[9];
index 02fcf77d4ff6283a407d17f40ff1f819ad05e457..8e0af448be51e645e826e917250f90a80b5178a0 100644 (file)
@@ -97,11 +97,13 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
        if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
                goto error;
 
-       src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+       src = ws->buffer_map(old_buf.res->buf, cs,
+                            PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
        if (!src)
                goto error;
 
-       dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+       dst = ws->buffer_map(new_buf->res->buf, cs,
+                            PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
        if (!dst)
                goto error;
 
index 62af1a311c2e113b6ee2028f26cfead52fcd5039..ca066e898234dd142aaa19dbdbf6724ff4633bcb 100644 (file)
@@ -148,7 +148,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
        buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 
        /* and map it for CPU access */
-       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+                                 PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 
        /* calc buffer offsets */
        dec->msg = (struct ruvd_msg *)ptr;
@@ -1015,7 +1016,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,
        dec->bs_size = 0;
        dec->bs_ptr = dec->ws->buffer_map(
                dec->bs_buffers[dec->cur_buffer].res->buf,
-               dec->cs, PIPE_TRANSFER_WRITE);
+               dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 }
 
 /**
@@ -1060,8 +1061,9 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
                                return;
                        }
 
-                       dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
-                                                         PIPE_TRANSFER_WRITE);
+                       dec->bs_ptr = dec->ws->buffer_map(
+                               buf->res->buf, dec->cs,
+                               PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
                        if (!dec->bs_ptr)
                                return;
 
index 4384e5e1646e833f830e1962f68e47956998d1eb..3164dbb2c20f7fb4608f8811a71e10ce89a81249 100644 (file)
@@ -263,9 +263,9 @@ radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,
 
    if (NULL != size) {
       radeon_uvd_enc_feedback_t *fb_data =
-         (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
-                                                           enc->cs,
-                                                           PIPE_TRANSFER_READ_WRITE);
+         (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(
+               fb->res->buf, enc->cs,
+               PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 
       if (!fb_data->status)
          *size = fb_data->bitstream_size;
index 310d1654b05dcf6490871a6bf6b8f2e938d6e47a..94df06e88c6b5334a899109050a400a99d2f374a 100644 (file)
@@ -352,7 +352,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
        struct rvid_buffer *fb = feedback;
 
        if (size) {
-               uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+               uint32_t *ptr = enc->ws->buffer_map(
+                       fb->res->buf, enc->cs,
+                       PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 
                if (ptr[1]) {
                        *size = ptr[4] - ptr[9];
index 1ee85ae3d3f799668cbcb73dcf4e569c5c314a16..e402af21a64ea7eeef3184a1d7a86c7351820eaa 100644 (file)
@@ -941,7 +941,9 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
                        si_vid_clear_buffer(dec->base.context, &dec->ctx);
 
                        /* ctx needs probs table */
-                       ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+                       ptr = dec->ws->buffer_map(
+                               dec->ctx.res->buf, dec->cs,
+                               PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
                        fill_probs_table(ptr);
                        dec->ws->buffer_unmap(dec->ctx.res->buf);
                }
@@ -1034,7 +1036,8 @@ static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)
        buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
 
        /* and map it for CPU access */
-       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+                                 PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 
        /* calc buffer offsets */
        dec->msg = ptr;
@@ -1312,7 +1315,7 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
        dec->bs_size = 0;
        dec->bs_ptr = dec->ws->buffer_map(
                dec->bs_buffers[dec->cur_buffer].res->buf,
-               dec->cs, PIPE_TRANSFER_WRITE);
+               dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 }
 
 /**
@@ -1357,8 +1360,9 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
                                return;
                        }
 
-                       dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
-                                                         PIPE_TRANSFER_WRITE);
+                       dec->bs_ptr = dec->ws->buffer_map(
+                               buf->res->buf, dec->cs,
+                               PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
                        if (!dec->bs_ptr)
                                return;
 
@@ -1543,7 +1547,9 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
                        void *ptr;
 
                        buf = &dec->msg_fb_it_probs_buffers[i];
-                       ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+                       ptr = dec->ws->buffer_map(
+                               buf->res->buf, dec->cs,
+                               PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
                        ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
                        fill_probs_table(ptr);
                        dec->ws->buffer_unmap(buf->res->buf);
index e8676f6c721033f463506c45662a917773563ed0..7d64a28a405483dd893c0739e83f3a3f81d042ed 100644 (file)
@@ -244,7 +244,9 @@ static void radeon_enc_get_feedback(struct pipe_video_codec *encoder,
        struct rvid_buffer *fb = feedback;
 
        if (size) {
-               uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+               uint32_t *ptr = enc->ws->buffer_map(
+                       fb->res->buf, enc->cs,
+                       PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
                if (ptr[1])
                        *size = ptr[6];
                else
index a39ce4cc73e4a95583747ebdb1e7c8f530e225d0..bb1173e8005df33f9c482f62d2bca6e7bc281686 100644 (file)
@@ -88,11 +88,13 @@ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
        if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
                goto error;
 
-       src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+       src = ws->buffer_map(old_buf.res->buf, cs,
+                            PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
        if (!src)
                goto error;
 
-       dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+       dst = ws->buffer_map(new_buf->res->buf, cs,
+                            PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
        if (!dst)
                goto error;
 
index 49f8bb279e5d70374e7d677dde308776b6c71138..a56ff75ad242f594244574c9db2dc387e6b1d5e2 100644 (file)
@@ -76,6 +76,15 @@ enum radeon_bo_usage { /* bitfield */
     RADEON_USAGE_SYNCHRONIZED = 8
 };
 
+enum radeon_transfer_flags {
+   /* Indicates that the caller will unmap the buffer.
+    *
+    * Not unmapping buffers is an important performance optimization for
+    * OpenGL (avoids kernel overhead for frequently mapped buffers).
+    */
+   RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0),
+};
+
 #define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
 
 enum ring_type {
@@ -294,9 +303,12 @@ struct radeon_winsys {
      * Map the entire data store of a buffer object into the client's address
      * space.
      *
+     * Callers are expected to unmap buffers again if and only if the
+     * RADEON_TRANSFER_TEMPORARY flag is set in \p usage.
+     *
      * \param buf       A winsys buffer object to map.
      * \param cs        A command stream to flush if the buffer is referenced by it.
-     * \param usage     A bitmask of the PIPE_TRANSFER_* flags.
+     * \param usage     A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags.
      * \return          The pointer at the beginning of the buffer.
      */
     void *(*buffer_map)(struct pb_buffer *buf,
index 19522cc97b1721a11af6970248c843e7afc10544..d455fb5db6a0bd35d1e5787d73155c942f64dda6 100644 (file)
@@ -5293,7 +5293,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
        /* Upload. */
        ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
                                        PIPE_TRANSFER_READ_WRITE |
-                                       PIPE_TRANSFER_UNSYNCHRONIZED);
+                                       PIPE_TRANSFER_UNSYNCHRONIZED |
+                                       RADEON_TRANSFER_TEMPORARY);
 
        /* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
         * endian-independent. */
index 693f041b1da928a301c34d31f91e88eb45a5ac25..e99895d30d8c271521ebb0683d5c99a3b3a71e31 100644 (file)
@@ -341,7 +341,13 @@ enum pipe_transfer_usage
     * PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating
     * the resource.
     */
-   PIPE_TRANSFER_COHERENT = (1 << 14)
+   PIPE_TRANSFER_COHERENT = (1 << 14),
+
+   /**
+    * This and higher bits are reserved for private use by drivers. Drivers
+    * should use this as (PIPE_TRANSFER_DRV_PRV << i).
+    */
+   PIPE_TRANSFER_DRV_PRV = (1 << 24)
 };
 
 /**
index 4cbaa8056ad74fed0c8de8ca244dc7675f9af5ec..73336dd3e011c924e355e640e0907efbcdd3cfa8 100644 (file)
@@ -56,6 +56,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
                  unsigned alignment,
                  enum radeon_bo_domain domain,
                  enum radeon_bo_flag flags);
+static void amdgpu_bo_unmap(struct pb_buffer *buf);
 
 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
                            enum radeon_bo_usage usage)
@@ -173,6 +174,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
 
    assert(bo->bo && "must not be called for slab entries");
 
+   if (!bo->is_user_ptr && bo->cpu_ptr) {
+      bo->cpu_ptr = NULL;
+      amdgpu_bo_unmap(&bo->base);
+   }
+   assert(bo->is_user_ptr || bo->u.real.map_count == 0);
+
    if (ws->debug_all_bos) {
       simple_mtx_lock(&ws->global_bo_list_lock);
       LIST_DEL(&bo->u.real.global_list_item);
@@ -195,14 +202,6 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
       ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
 
-   if (bo->u.real.map_count >= 1) {
-      if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-         ws->mapped_vram -= bo->base.size;
-      else if (bo->initial_domain & RADEON_DOMAIN_GTT)
-         ws->mapped_gtt -= bo->base.size;
-      ws->num_mapped_buffers--;
-   }
-
    simple_mtx_destroy(&bo->lock);
    FREE(bo);
 }
@@ -219,6 +218,29 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
       amdgpu_bo_destroy(_buf);
 }
 
+static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
+{
+   assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
+   int r = amdgpu_bo_cpu_map(bo->bo, cpu);
+   if (r) {
+      /* Clear the cache and try again. */
+      pb_cache_release_all_buffers(&bo->ws->bo_cache);
+      r = amdgpu_bo_cpu_map(bo->bo, cpu);
+      if (r)
+         return false;
+   }
+
+   if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
+      if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+         bo->ws->mapped_vram += bo->base.size;
+      else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+         bo->ws->mapped_gtt += bo->base.size;
+      bo->ws->num_mapped_buffers++;
+   }
+
+   return true;
+}
+
 static void *amdgpu_bo_map(struct pb_buffer *buf,
                            struct radeon_cmdbuf *rcs,
                            enum pipe_transfer_usage usage)
@@ -226,9 +248,6 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
    struct amdgpu_winsys_bo *real;
    struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
-   int r;
-   void *cpu = NULL;
-   uint64_t offset = 0;
 
    assert(!bo->sparse);
 
@@ -313,9 +332,9 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
       }
    }
 
-   /* If the buffer is created from user memory, return the user pointer. */
-   if (bo->user_ptr)
-      return bo->user_ptr;
+   /* Buffer synchronization has been checked, now actually map the buffer. */
+   void *cpu = NULL;
+   uint64_t offset = 0;
 
    if (bo->bo) {
       real = bo;
@@ -324,22 +343,31 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
       offset = bo->va - real->va;
    }
 
-   r = amdgpu_bo_cpu_map(real->bo, &cpu);
-   if (r) {
-      /* Clear the cache and try again. */
-      pb_cache_release_all_buffers(&real->ws->bo_cache);
-      r = amdgpu_bo_cpu_map(real->bo, &cpu);
-      if (r)
-         return NULL;
+   if (usage & RADEON_TRANSFER_TEMPORARY) {
+      if (real->is_user_ptr) {
+         cpu = real->cpu_ptr;
+      } else {
+         if (!amdgpu_bo_do_map(real, &cpu))
+            return NULL;
+      }
+   } else {
+      cpu = p_atomic_read(&real->cpu_ptr);
+      if (!cpu) {
+         simple_mtx_lock(&real->lock);
+         /* Must re-check due to the possibility of a race. Re-check need not
+          * be atomic thanks to the lock. */
+         cpu = real->cpu_ptr;
+         if (!cpu) {
+            if (!amdgpu_bo_do_map(real, &cpu)) {
+               simple_mtx_unlock(&real->lock);
+               return NULL;
+            }
+            p_atomic_set(&real->cpu_ptr, cpu);
+         }
+         simple_mtx_unlock(&real->lock);
+      }
    }
 
-   if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
-      if (real->initial_domain & RADEON_DOMAIN_VRAM)
-         real->ws->mapped_vram += real->base.size;
-      else if (real->initial_domain & RADEON_DOMAIN_GTT)
-         real->ws->mapped_gtt += real->base.size;
-      real->ws->num_mapped_buffers++;
-   }
    return (uint8_t*)cpu + offset;
 }
 
@@ -350,12 +378,15 @@ static void amdgpu_bo_unmap(struct pb_buffer *buf)
 
    assert(!bo->sparse);
 
-   if (bo->user_ptr)
+   if (bo->is_user_ptr)
       return;
 
    real = bo->bo ? bo : bo->u.slab.real;
-
+   assert(real->u.real.map_count != 0 && "too many unmaps");
    if (p_atomic_dec_zero(&real->u.real.map_count)) {
+      assert(!real->cpu_ptr &&
+             "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag");
+
       if (real->initial_domain & RADEON_DOMAIN_VRAM)
          real->ws->mapped_vram -= real->base.size;
       else if (real->initial_domain & RADEON_DOMAIN_GTT)
@@ -1459,6 +1490,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
         goto error_va_map;
 
     /* Initialize it. */
+    bo->is_user_ptr = true;
     pipe_reference_init(&bo->base.reference, 1);
     simple_mtx_init(&bo->lock, mtx_plain);
     bo->bo = buf_handle;
@@ -1466,7 +1498,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
     bo->base.size = size;
     bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
     bo->ws = ws;
-    bo->user_ptr = pointer;
+    bo->cpu_ptr = pointer;
     bo->va = va;
     bo->u.real.va_handle = va_handle;
     bo->initial_domain = RADEON_DOMAIN_GTT;
@@ -1493,7 +1525,7 @@ error:
 
 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
 {
-   return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
+   return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
 }
 
 static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
index 58e6eed733d56fe907e6be17158e86bc72b8fa3e..88f4241327d831f04cb5feaa6efe8052f0032469 100644 (file)
@@ -88,10 +88,11 @@ struct amdgpu_winsys_bo {
    } u;
 
    struct amdgpu_winsys *ws;
-   void *user_ptr; /* from buffer_from_ptr */
+   void *cpu_ptr; /* for user_ptr and permanent maps */
 
    amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
    bool sparse;
+   bool is_user_ptr;
    bool is_local;
    uint32_t unique_id;
    uint64_t va;