radeonsi: implement clear_buffer using CP DMA, initialize CMASK with it
authorMarek Olšák <marek.olsak@amd.com>
Sun, 22 Sep 2013 19:47:35 +0000 (21:47 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sun, 29 Sep 2013 13:18:09 +0000 (15:18 +0200)
More work needs to be done for this to be entirely shared with r600g.
I'm just trying to share r600_texture.c now.

The reason I put the implementation to si_descriptors.c is that the emit
function had already been there.

src/gallium/drivers/radeonsi/r600_texture.c
src/gallium/drivers/radeonsi/radeonsi_pipe.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_state_draw.c

index aa307fb1a225b2944684b25ea7779bf23bdb4e48..53452c892c2bff84e69a065a95cbf909065e6896 100644 (file)
@@ -462,8 +462,9 @@ r600_texture_create_object(struct pipe_screen *screen,
 
        if (rtex->cmask.size) {
                /* Initialize the cmask to 0xCC (= compressed state). */
-               char *map = rscreen->b.ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
-               memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size);
+               r600_screen_clear_buffer(&rscreen->b, &resource->b.b,
+                                        rtex->cmask.offset, rtex->cmask.size,
+                                        0xCCCCCCCC);
        }
 
        if (rscreen->b.debug_flags & DBG_TEX_DEPTH && rtex->is_depth) {
index 5528e1968a3ff08a6ea95e8ae5b265809cdfd9a4..8ed5d26a0e97fb6e145dd10edf885902134b8e16 100644 (file)
@@ -653,6 +653,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
        if (!radeon_winsys_unref(rscreen->b.ws))
                return;
 
+       r600_common_screen_cleanup(&rscreen->b);
+
        if (rscreen->fences.bo) {
                struct r600_fence_block *entry, *tmp;
 
@@ -823,18 +825,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
                return NULL;
        }
 
-       r600_common_screen_init(&rscreen->b, ws);
-
-       if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE))
-               rscreen->b.debug_flags |= DBG_TEX_DEPTH;
-       if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
-               rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
-
-       if (r600_init_tiling(rscreen)) {
-               FREE(rscreen);
-               return NULL;
-       }
-
+       /* Set functions first. */
+       rscreen->b.b.context_create = r600_create_context;
        rscreen->b.b.destroy = r600_destroy_screen;
        rscreen->b.b.get_name = r600_get_name;
        rscreen->b.b.get_vendor = r600_get_vendor;
@@ -844,12 +836,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
        rscreen->b.b.get_compute_param = r600_get_compute_param;
        rscreen->b.b.get_timestamp = r600_get_timestamp;
        rscreen->b.b.is_format_supported = si_is_format_supported;
-       rscreen->b.b.context_create = r600_create_context;
        rscreen->b.b.fence_reference = r600_fence_reference;
        rscreen->b.b.fence_signalled = r600_fence_signalled;
        rscreen->b.b.fence_finish = r600_fence_finish;
-       r600_init_screen_resource_functions(&rscreen->b.b);
-
        if (rscreen->b.info.has_uvd) {
                rscreen->b.b.get_video_param = ruvd_get_video_param;
                rscreen->b.b.is_video_format_supported = ruvd_is_format_supported;
@@ -857,6 +846,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
                rscreen->b.b.get_video_param = r600_get_video_param;
                rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported;
        }
+       r600_init_screen_resource_functions(&rscreen->b.b);
+
+       r600_common_screen_init(&rscreen->b, ws);
+
+       if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE))
+               rscreen->b.debug_flags |= DBG_TEX_DEPTH;
+       if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
+               rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
+
+       if (r600_init_tiling(rscreen)) {
+               FREE(rscreen);
+               return NULL;
+       }
 
        util_format_s3tc_init();
 
index a8f87815a5b4355faa741bd8dcbe15852abf6b4b..93d3684b68419d18d573e5720a49d4deb7473588 100644 (file)
@@ -44,7 +44,7 @@ static uint32_t null_desc[8]; /* zeros */
 #define SI_CP_DMA_RAW_WAIT     (1 << 1) /* SI+ */
 
 /* Emit a CP DMA packet to do a copy from one buffer to another.
- * The size must fit in bits [20:0]. Notes:
+ * The size must fit in bits [20:0].
  */
 static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx,
                                       uint64_t dst_va, uint64_t src_va,
@@ -517,6 +517,88 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
        si_update_descriptors(rctx, &buffers->desc);
 }
 
+/* CP DMA */
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+                           unsigned offset, unsigned size, unsigned value)
+{
+       struct r600_context *rctx = (struct r600_context*)ctx;
+
+       if (!size)
+               return;
+
+       /* Fallback for unaligned clears. */
+       if (offset % 4 != 0 || size % 4 != 0) {
+               uint32_t *map = rctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+                                                      rctx->b.rings.gfx.cs,
+                                                      PIPE_TRANSFER_WRITE);
+               size /= 4;
+               for (unsigned i = 0; i < size; i++)
+                       *map++ = value;
+
+               util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+                              offset + size);
+               return;
+       }
+
+       uint64_t va = r600_resource_va(&rctx->screen->b.b, dst) + offset;
+
+       /* Flush the caches where the resource is bound. */
+       /* XXX only flush the caches where the buffer is bound. */
+       rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+                        R600_CONTEXT_INV_CONST_CACHE |
+                        R600_CONTEXT_FLUSH_AND_INV_CB |
+                        R600_CONTEXT_FLUSH_AND_INV_DB |
+                        R600_CONTEXT_FLUSH_AND_INV_CB_META |
+                        R600_CONTEXT_FLUSH_AND_INV_DB_META;
+       rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+
+       while (size) {
+               unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+               unsigned dma_flags = 0;
+
+               si_need_cs_space(rctx, 7 + (rctx->b.flags ? rctx->cache_flush.num_dw : 0),
+                                FALSE);
+
+               /* This must be done after need_cs_space. */
+               r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+                                     (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+
+               /* Flush the caches for the first copy only.
+                * Also wait for the previous CP DMA operations. */
+               if (rctx->b.flags) {
+                       si_emit_cache_flush(&rctx->b, NULL);
+                       dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
+               }
+
+               /* Do the synchronization after the last copy, so that all data is written to memory. */
+               if (size == byte_count)
+                       dma_flags |= R600_CP_DMA_SYNC;
+
+               /* Emit the clear packet. */
+               si_emit_cp_dma_clear_buffer(rctx, va, byte_count, value, dma_flags);
+
+               size -= byte_count;
+               va += byte_count;
+       }
+
+       /* Flush the caches again in case the 3D engine has been prefetching
+        * the resource. */
+       /* XXX only flush the caches where the buffer is bound. */
+       rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+                        R600_CONTEXT_INV_CONST_CACHE |
+                        R600_CONTEXT_FLUSH_AND_INV_CB |
+                        R600_CONTEXT_FLUSH_AND_INV_DB |
+                        R600_CONTEXT_FLUSH_AND_INV_CB_META |
+                        R600_CONTEXT_FLUSH_AND_INV_DB_META;
+
+       util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+                      offset + size);
+}
+
 /* INIT/DEINIT */
 
 void si_init_all_descriptors(struct r600_context *rctx)
@@ -541,6 +623,7 @@ void si_init_all_descriptors(struct r600_context *rctx)
        /* Set pipe_context functions. */
        rctx->b.b.set_constant_buffer = si_set_constant_buffer;
        rctx->b.b.set_stream_output_targets = si_set_streamout_targets;
+       rctx->b.clear_buffer = si_clear_buffer;
 }
 
 void si_release_all_descriptors(struct r600_context *rctx)
index cb5055a57d059ac23f97ab76329e35f33c6d7398..021352307c2b51a0590c9281eec00432c6a11e2a 100644 (file)
@@ -677,7 +677,10 @@ void si_emit_cache_flush(struct r600_common_context *rctx, struct r600_atom *ato
                radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
        }
 
-       if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
+       if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) {
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+       } else if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
                /* Needed if streamout buffers are going to be used as a source. */
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));