freedreno: a2xx: fd2_draw update
authorJonathan Marek <jonathan@marek.ca>
Tue, 13 Nov 2018 16:04:32 +0000 (11:04 -0500)
committerRob Clark <robdclark@gmail.com>
Tue, 27 Nov 2018 20:44:02 +0000 (15:44 -0500)
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a2xx/fd2_draw.c
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h
src/gallium/drivers/freedreno/freedreno_draw.c
src/gallium/drivers/freedreno/freedreno_draw.h
src/gallium/drivers/freedreno/freedreno_util.h

index f00bec6efcdc20311554a08df76bf56e092cb6dc..6dac8ca6a9d4b898520ebb19944a7c84bc291e99 100644 (file)
@@ -77,29 +77,44 @@ emit_vertexbufs(struct fd_context *ctx)
        fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
 }
 
-static bool
-fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
-             unsigned index_offset)
+static void
+draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
+                  struct fd_ringbuffer *ring, unsigned index_offset)
 {
-       struct fd_ringbuffer *ring = ctx->batch->draw;
-
-       if (ctx->dirty & FD_DIRTY_VTXBUF)
-               emit_vertexbufs(ctx);
-
-       fd2_emit_state(ctx, ctx->dirty);
-
        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
        OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
-       OUT_RING(ring, info->start);
+       OUT_RING(ring, info->index_size ? 0 : info->start);
 
        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
        OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
-       OUT_RING(ring, 0x0000003b);
+       OUT_RING(ring, is_a20x(ctx->screen) ? 0x00000002 : 0x0000003b);
 
        OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
        OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
 
-       if (!is_a20x(ctx->screen)) {
+       if (is_a20x(ctx->screen)) {
+               /* wait for DMA to finish and
+                * dummy draw one triangle with indexes 0,0,0.
+                * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
+                *
+                * this workaround is for a HW bug related to DMA alignment:
+                * it is necessary for indexed draws and possibly also
+                * draws that read binning data
+                */
+               OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+               OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
+               OUT_RING(ring, 0x00000001);
+
+               OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x0003c004);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000003);
+               OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 0x80, 0, 0);
+               OUT_RING(ring, 0x00000006);
+       } else {
                OUT_WFI (ring);
 
                OUT_PKT3(ring, CP_SET_CONSTANT, 3);
@@ -111,11 +126,62 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
                                 IGNORE_VISIBILITY, info, index_offset);
 
-       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-       OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
-       OUT_RING(ring, 0x00000000);
+       if (is_a20x(ctx->screen)) {
+               /* not sure why this is required, but it fixes some hangs */
+               OUT_WFI(ring);
+       } else {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
+               OUT_RING(ring, 0x00000000);
+       }
 
        emit_cacheflush(ring);
+}
+
+
+static bool
+fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
+                        unsigned index_offset)
+{
+       if (!ctx->prog.fp || !ctx->prog.vp)
+               return false;
+
+       if (ctx->dirty & FD_DIRTY_VTXBUF)
+               emit_vertexbufs(ctx);
+
+       fd2_emit_state(ctx, ctx->dirty);
+
+       /* a2xx can draw only 65535 vertices at once
+        * on a22x the field in the draw command is 32bits but seems limited too
+        * using a limit of 32k because it fixes an unexplained hang
+        * 32766 works for all primitives (multiple of 2 and 3)
+        */
+       if (pinfo->count > 32766) {
+               static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
+                       [0 ... PIPE_PRIM_MAX - 1]  = 32766,
+                       [PIPE_PRIM_LINE_STRIP]     = 32765,
+                       [PIPE_PRIM_TRIANGLE_STRIP] = 32764,
+
+                       /* needs more work */
+                       [PIPE_PRIM_TRIANGLE_FAN]   = 0,
+                       [PIPE_PRIM_LINE_LOOP]      = 0,
+               };
+
+               struct pipe_draw_info info = *pinfo;
+               unsigned count = info.count;
+               unsigned step = step_tbl[info.mode];
+
+               if (!step)
+                       return false;
+
+               for (; count + step > 32766; count -= step) {
+                       info.count = MIN2(count, 32766);
+                       draw_impl(ctx, &info, ctx->batch->draw, index_offset);
+                       info.start += step;
+               }
+       } else {
+               draw_impl(ctx, pinfo, ctx->batch->draw, index_offset);
+       }
 
        fd_context_all_clean(ctx);
 
index 28b4942f9f8a8ef47adb311512fec021fd037e0e..eae2f68ce11d4b8a599e790245a328ffcceaffcd 100644 (file)
@@ -83,6 +83,7 @@ batch_init(struct fd_batch *batch)
        batch->flushed = false;
        batch->gmem_reason = 0;
        batch->num_draws = 0;
+       batch->num_vertices = 0;
        batch->stage = FD_STAGE_NULL;
 
        fd_reset_wfi(batch);
index d4feadd55904b73245e7a2a2a2646e7537b2210a..a40d36094cd5960ee4bf39ae01cdc3bb1c951379 100644 (file)
@@ -124,6 +124,7 @@ struct fd_batch {
                FD_GMEM_LOGICOP_ENABLED      = 0x20,
        } gmem_reason;
        unsigned num_draws;   /* number of draws in current batch */
+       unsigned num_vertices;   /* number of vertices in current batch */
 
        /* Track the maximal bounds of the scissor of all the draws within a
         * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
index 90d0e44036122264a368c8290d0ef1e2e4078e74..f17cb56306326adfd318022dda0a2b8a95429c32 100644 (file)
@@ -291,6 +291,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
        if (ctx->draw_vbo(ctx, info, index_offset))
                batch->needs_flush = true;
 
+       batch->num_vertices += info->count * info->instance_count;
+
        for (i = 0; i < ctx->streamout.num_targets; i++)
                ctx->streamout.offsets[i] += info->count;
 
index 8ae13411ceff68bde77c54dfc7cefc3281c015aa..c2197f21a9f3b0a1abaa088cbe3ac70f462b94ae 100644 (file)
@@ -73,9 +73,29 @@ fd_draw(struct fd_batch *batch, struct fd_ringbuffer *ring,
        }
 
        if (is_a20x(batch->ctx->screen)) {
-               OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 4 : 2);
+               /* a20x has a different draw command for drawing with binning data
+                * note: if we do patching we will have to insert a NOP
+                *
+                * binning data is is 1 byte/vertex (8x8x4 bin position of vertex)
+                * base ptr set by the CP_SET_DRAW_INIT_FLAGS command
+                *
+                * TODO: investigate the faceness_cull_select parameter to see how
+                * it is used with hw binning to use "faceness" bits
+                */
+               uint32_t size = 2;
+               if (vismode)
+                       size += 2;
+               if (idx_buffer)
+                       size += 2;
+
+               OUT_PKT3(ring, vismode ? CP_DRAW_INDX_BIN : CP_DRAW_INDX, size);
                OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, DRAW_A20X(primtype, src_sel, idx_type, vismode, count));
+               OUT_RING(ring, DRAW_A20X(primtype, DI_FACE_CULL_NONE, src_sel,
+                                                                idx_type, vismode, vismode, count));
+               if (vismode == USE_VISIBILITY) {
+                       OUT_RING(ring, batch->num_vertices);
+                       OUT_RING(ring, count);
+               }
        } else {
                OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 5 : 3);
                OUT_RING(ring, 0x00000000);        /* viz query info. */
index 125ad83523cf3b186ebb64047183081c44524542..b0ed3fa8e4feadbd9d449a0474adf0c82f38f358 100644 (file)
@@ -114,15 +114,19 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
 }
 
 static inline uint32_t DRAW_A20X(enum pc_di_primtype prim_type,
+               enum pc_di_face_cull_sel faceness_cull_select,
                enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
-               enum pc_di_vis_cull_mode vis_cull_mode,
+               bool pre_fetch_cull_enable,
+               bool grp_cull_enable,
                uint16_t count)
 {
        return (prim_type         << 0) |
                        (source_select     << 6) |
+                       (faceness_cull_select << 8) |
                        ((index_size & 1)  << 11) |
                        ((index_size >> 1) << 13) |
-                       (vis_cull_mode     << 9) |
+                       (pre_fetch_cull_enable << 14) |
+                       (grp_cull_enable << 15) |
                        (count         << 16);
 }