radeonsi: add IB2 indirect buffer support for pm4 states
authorMarek Olšák <marek.olsak@amd.com>
Sun, 30 Aug 2015 16:39:19 +0000 (18:39 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 1 Sep 2015 19:51:15 +0000 (21:51 +0200)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
src/gallium/drivers/radeonsi/si_pm4.c
src/gallium/drivers/radeonsi/si_pm4.h
src/gallium/drivers/radeonsi/sid.h

index b06e92b1fef174d5118588309be6608666108070..b1834afa796af6719c0b89d0c2cb931e5241bcb9 100644 (file)
@@ -107,6 +107,7 @@ void si_pm4_free_state_simple(struct si_pm4_state *state)
 {
        for (int i = 0; i < state->nbo; ++i)
                r600_resource_reference(&state->bo[i], NULL);
+       r600_resource_reference(&state->indirect_buffer, NULL);
        FREE(state);
 }
 
@@ -133,7 +134,19 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
                                      state->bo_usage[i], state->bo_priority[i]);
        }
 
-       radeon_emit_array(cs, state->pm4, state->ndw);
+       if (!state->indirect_buffer) {
+               radeon_emit_array(cs, state->pm4, state->ndw);
+       } else {
+               struct r600_resource *ib = state->indirect_buffer;
+
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+                                         RADEON_USAGE_READ, RADEON_PRIO_MIN);
+
+               radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+               radeon_emit(cs, ib->gpu_address);
+               radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
+               radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
+       }
 }
 
 void si_pm4_emit_dirty(struct si_context *sctx)
@@ -153,3 +166,36 @@ void si_pm4_reset_emitted(struct si_context *sctx)
 {
        memset(&sctx->emitted, 0, sizeof(sctx->emitted));
 }
+
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+                                  struct si_pm4_state *state)
+{
+       struct pipe_screen *screen = sctx->b.b.screen;
+       unsigned aligned_ndw = align(state->ndw, 8);
+
+       /* only supported on CIK and later */
+       if (sctx->b.chip_class < CIK)
+               return;
+
+       assert(state->ndw);
+       assert(aligned_ndw <= SI_PM4_MAX_DW);
+
+       r600_resource_reference(&state->indirect_buffer, NULL);
+       state->indirect_buffer = (struct r600_resource*)
+               pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
+                                  PIPE_USAGE_DEFAULT, aligned_ndw * 4);
+       if (!state->indirect_buffer)
+               return;
+
+       /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */
+       if (sctx->screen->b.info.gfx_ib_pad_with_type2) {
+               for (int i = state->ndw; i < aligned_ndw; i++)
+                       state->pm4[i] = 0x80000000; /* type2 nop packet */
+       } else {
+               for (int i = state->ndw; i < aligned_ndw; i++)
+                       state->pm4[i] = 0xffff1000; /* type3 nop packet */
+       }
+
+       pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b,
+                         0, aligned_ndw *4, state->pm4);
+}
index efa20628aabf1dd698d5b391b60e2fb775ebd1ad..5282d00fe467c5d60756f45d49a1a996cc9a7678 100644 (file)
@@ -39,6 +39,9 @@ enum chip_class;
 
 struct si_pm4_state
 {
+       /* optional indirect buffer */
+       struct r600_resource    *indirect_buffer;
+
        /* PKT3_SET_*_REG handling */
        unsigned        last_opcode;
        unsigned        last_reg;
@@ -66,6 +69,8 @@ void si_pm4_add_bo(struct si_pm4_state *state,
                   struct r600_resource *bo,
                   enum radeon_bo_usage usage,
                   enum radeon_bo_priority priority);
+void si_pm4_upload_indirect_buffer(struct si_context *sctx,
+                                  struct si_pm4_state *state);
 
 void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
index cd6be73f66c4d18edba2d0e7e1ea3e48db8af1ea..4bb24572b907bebff43442993b2d3223e90399e2 100644 (file)
@@ -94,7 +94,7 @@
 #define PKT3_DRAW_INDEX_IMMD                   0x2E /* not on CIK */
 #define PKT3_NUM_INSTANCES                     0x2F
 #define PKT3_DRAW_INDEX_MULTI_AUTO             0x30
-#define PKT3_INDIRECT_BUFFER                   0x32
+#define PKT3_INDIRECT_BUFFER_SI                0x32 /* not on CIK */
 #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
 #define PKT3_DRAW_INDEX_OFFSET_2               0x35
 #define PKT3_DRAW_PREAMBLE                     0x36 /* new on CIK, required on GFX7.2 and later */
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define                WAIT_REG_MEM_EQUAL              3
 #define PKT3_MEM_WRITE                         0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK               0x3F /* new on CIK */
 #define PKT3_COPY_DATA                        0x40
 #define                COPY_DATA_SRC_SEL(x)            ((x) & 0xf)
 #define                        COPY_DATA_REG           0