radeonsi: add si_cp_copy_data
authorMarek Olšák <marek.olsak@amd.com>
Tue, 12 Feb 2019 20:03:13 +0000 (15:03 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 23 Apr 2019 15:36:33 +0000 (11:36 -0400)
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_cp_dma.c
src/gallium/drivers/radeonsi/si_perfcounter.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 729dc4e325f004375df9b4d58442ca19890e3467..f1afef2e66f6ae4ba72efc3208498247714ce1f1 100644 (file)
@@ -724,22 +724,11 @@ static void si_setup_tgsi_user_data(struct si_context *sctx,
 
        if (info->indirect) {
                if (program->uses_grid_size) {
-                       uint64_t base_va = si_resource(info->indirect)->gpu_address;
-                       uint64_t va = base_va + info->indirect_offset;
-                       int i;
-
-                       radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-                                        si_resource(info->indirect),
-                                        RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
-
-                       for (i = 0; i < 3; ++i) {
-                               radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-                               radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-                                               COPY_DATA_DST_SEL(COPY_DATA_REG));
-                               radeon_emit(cs, (va + 4 * i));
-                               radeon_emit(cs, (va + 4 * i) >> 32);
-                               radeon_emit(cs, (grid_size_reg >> 2) + i);
-                               radeon_emit(cs, 0);
+                       for (unsigned i = 0; i < 3; ++i) {
+                               si_cp_copy_data(sctx,
+                                               COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
+                                               COPY_DATA_SRC_MEM, si_resource(info->indirect),
+                                               info->indirect_offset + 4 * i);
                        }
                }
        } else {
index f349325202c4ee180fa5b3e3feb402fda7eb93ff..404117d1813fe239f876dcc6fbc43169b3a4bdd2 100644 (file)
@@ -606,3 +606,31 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf,
        radeon_emit(cs, va >> 32);
        radeon_emit_array(cs, (const uint32_t*)data, size/4);
 }
+
+void si_cp_copy_data(struct si_context *sctx,
+                    unsigned dst_sel, struct si_resource *dst, unsigned dst_offset,
+                    unsigned src_sel, struct si_resource *src, unsigned src_offset)
+{
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+       if (dst) {
+               radeon_add_to_buffer_list(sctx, cs, dst,
+                                         RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+       }
+       if (src) {
+               radeon_add_to_buffer_list(sctx, cs, src,
+                                         RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
+       }
+
+       uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset;
+       uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset;
+
+       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+       radeon_emit(cs, COPY_DATA_SRC_SEL(src_sel) |
+                       COPY_DATA_DST_SEL(dst_sel) |
+                       COPY_DATA_WR_CONFIRM);
+       radeon_emit(cs, src_va);
+       radeon_emit(cs, src_va >> 32);
+       radeon_emit(cs, dst_va);
+       radeon_emit(cs, dst_va >> 32);
+}
index 4ce71f9500d264c2be8b6705d45eba1fa7d30879..c15c444cc40b85cde9e0dde16a4ecea1572da8ec 100644 (file)
@@ -671,16 +671,9 @@ static void si_pc_emit_start(struct si_context *sctx,
 {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
-       radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
-                                 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
-
-       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-                       COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
-       radeon_emit(cs, 1); /* immediate */
-       radeon_emit(cs, 0); /* unused */
-       radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
+       si_cp_copy_data(sctx,
+                       COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
+                       COPY_DATA_IMM, NULL, 1);
 
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
index aaa95f32d20ff404d9693dc754538acecc4b4e42..e45ef62ecfeb500a90955faf88293a341c1b9638 100644 (file)
@@ -1239,6 +1239,9 @@ void si_test_gds(struct si_context *sctx);
 void si_cp_write_data(struct si_context *sctx, struct si_resource *buf,
                      unsigned offset, unsigned size, unsigned dst_sel,
                      unsigned engine, const void *data);
+void si_cp_copy_data(struct si_context *sctx,
+                    unsigned dst_sel, struct si_resource *dst, unsigned dst_offset,
+                    unsigned src_sel, struct si_resource *src, unsigned src_offset);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
index 2a514f144b977935da1e4719bb138ac3eec5aa3d..1cadc416ca3dfbb40a30de36e065021b7d8df266 100644 (file)
@@ -678,24 +678,14 @@ static void si_emit_draw_packets(struct si_context *sctx,
        if (info->count_from_stream_output) {
                struct si_streamout_target *t =
                        (struct si_streamout_target*)info->count_from_stream_output;
-               uint64_t va = t->buf_filled_size->gpu_address +
-                             t->buf_filled_size_offset;
 
                radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
                                       t->stride_in_dw);
-
-               radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-               radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-                           COPY_DATA_DST_SEL(COPY_DATA_REG) |
-                           COPY_DATA_WR_CONFIRM);
-               radeon_emit(cs, va);     /* src address lo */
-               radeon_emit(cs, va >> 32); /* src address hi */
-               radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
-               radeon_emit(cs, 0); /* unused */
-
-               radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-                                     t->buf_filled_size, RADEON_USAGE_READ,
-                                     RADEON_PRIO_SO_FILLED_SIZE);
+               si_cp_copy_data(sctx,
+                               COPY_DATA_REG, NULL,
+                               R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2,
+                               COPY_DATA_SRC_MEM, t->buf_filled_size,
+                               t->buf_filled_size_offset);
        }
 
        /* draw packet */