freedreno: use rsc->slice accessor everywhere
[mesa.git] / src / gallium / drivers / freedreno / a5xx / fd5_emit.c
index ad942fd7f5ae5e1f817a29fe99b75b2c8da52730..4de5d0aa62d596999ff7725d2234d58e64fc05bd 100644 (file)
@@ -28,7 +28,7 @@
 #include "util/u_string.h"
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_viewport.h"
 
 #include "freedreno_resource.h"
@@ -396,37 +396,24 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 static void
 emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
-               enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+               enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
+               const struct ir3_shader_variant *v)
 {
        unsigned count = util_last_bit(so->enabled_mask);
+       const struct ir3_ibo_mapping *m = &v->image_mapping;
 
-       if (count == 0)
-               return;
-
-       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-                       CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-                       CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
-                       CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
        for (unsigned i = 0; i < count; i++) {
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-       }
+               unsigned slot = m->ssbo_to_ibo[i];
+
+               OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+               OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+                               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+                               CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE4_0_NUM_UNIT(1));
+               OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+                               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+               OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
 
-       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-                       CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-                       CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
-                       CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
                struct pipe_shader_buffer *buf = &so->sb[i];
                unsigned sz = buf->buffer_size;
 
@@ -435,18 +422,16 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
                OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
                OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
-       }
 
-       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-                       CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-                       CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
-                       CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
-               struct pipe_shader_buffer *buf = &so->sb[i];
+               OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+               OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+                               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+                               CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE4_0_NUM_UNIT(1));
+               OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+                               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+               OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
                if (buf->buffer) {
                        struct fd_resource *rsc = fd_resource(buf->buffer);
                        OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
@@ -587,7 +572,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
-               bool fragz = fp->has_kill | fp->writes_pos;
+               bool fragz = fp->no_earlyz | fp->writes_pos;
 
                OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
                OUT_RING(ring, zsa->rb_depth_cntl);
@@ -821,10 +806,10 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                emit_border_color(ctx, ring);
 
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
-               emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+               emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT], fp);
 
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
-               fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
+               fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
 }
 
 void
@@ -862,10 +847,10 @@ fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
 
        if (dirty & FD_DIRTY_SHADER_SSBO)
-               emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
+               emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE], cp);
 
        if (dirty & FD_DIRTY_SHADER_IMAGE)
-               fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
+               fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
 }
 
 /* emit setup at begin of new cmdstream buffer (don't rely on previous
@@ -934,8 +919,19 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
        OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
        OUT_RING(ring, 0x0000001e);   /* SP_MODE_CNTL */
 
-       OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
-       OUT_RING(ring, 0x40000800);   /* SP_DBG_ECO_CNTL */
+       if (ctx->screen->gpu_id == 540) {
+               OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x800);   /* SP_DBG_ECO_CNTL */
+
+               OUT_PKT4(ring, REG_A5XX_HLSQ_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x800400);
+       } else {
+               OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x40000800);   /* SP_DBG_ECO_CNTL */
+       }
 
        OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
        OUT_RING(ring, 0x00000544);   /* TPL1_MODE_CNTL */
@@ -1103,20 +1099,6 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
        OUT_RING(ring, 0x00000000);
 }
 
-static void
-fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
-{
-       /* for debug after a lock up, write a unique counter value
-        * to scratch6 for each IB, to make it easier to match up
-        * register dumps to cmdstream.  The combination of IB and
-        * DRAW (scratch7) is enough to "triangulate" the particular
-        * draw that caused lockup.
-        */
-       emit_marker5(ring, 6);
-       __OUT_IB5(ring, target);
-       emit_marker5(ring, 6);
-}
-
 static void
 fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
                unsigned dst_off, struct pipe_resource *src, unsigned src_off,
@@ -1137,12 +1119,17 @@ fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
        }
 }
 
+void
+fd5_emit_init_screen(struct pipe_screen *pscreen)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       screen->emit_const = fd5_emit_const;
+       screen->emit_const_bo = fd5_emit_const_bo;
+       screen->emit_ib = fd5_emit_ib;
+       screen->mem_to_mem = fd5_mem_to_mem;
+}
+
 void
 fd5_emit_init(struct pipe_context *pctx)
 {
-       struct fd_context *ctx = fd_context(pctx);
-       ctx->emit_const = fd5_emit_const;
-       ctx->emit_const_bo = fd5_emit_const_bo;
-       ctx->emit_ib = fd5_emit_ib;
-       ctx->mem_to_mem = fd5_mem_to_mem;
 }