freedreno: use rsc->slice accessor everywhere
[mesa.git] / src / gallium / drivers / freedreno / a5xx / fd5_emit.c
index bca8aebe50a230125a6f7505abc68aa12de8890e..4de5d0aa62d596999ff7725d2234d58e64fc05bd 100644 (file)
@@ -28,7 +28,7 @@
 #include "util/u_string.h"
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_viewport.h"
 
 #include "freedreno_resource.h"
@@ -51,7 +51,7 @@
  * sizedwords:     size of const value buffer
  */
 static void
-fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
+fd5_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                const uint32_t *dwords, struct pipe_resource *prsc)
 {
@@ -90,7 +90,7 @@ fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 }
 
 static void
-fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+fd5_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
                uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
 {
        uint32_t anum = align(num, 2);
@@ -396,37 +396,24 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 static void
 emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
-               enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+               enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
+               const struct ir3_shader_variant *v)
 {
        unsigned count = util_last_bit(so->enabled_mask);
+       const struct ir3_ibo_mapping *m = &v->image_mapping;
 
-       if (count == 0)
-               return;
-
-       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-                       CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-                       CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
-                       CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
        for (unsigned i = 0; i < count; i++) {
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-               OUT_RING(ring, 0x00000000);
-       }
+               unsigned slot = m->ssbo_to_ibo[i];
+
+               OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+               OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+                               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+                               CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE4_0_NUM_UNIT(1));
+               OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+                               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+               OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
 
-       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-                       CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-                       CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
-                       CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
                struct pipe_shader_buffer *buf = &so->sb[i];
                unsigned sz = buf->buffer_size;
 
@@ -435,18 +422,16 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
                OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
                OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
-       }
 
-       OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-                       CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-                       CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(count));
-       OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
-                       CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-       OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-       for (unsigned i = 0; i < count; i++) {
-               struct pipe_shader_buffer *buf = &so->sb[i];
+               OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+               OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+                               CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+                               CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE4_0_NUM_UNIT(1));
+               OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+                               CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+               OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
                if (buf->buffer) {
                        struct fd_resource *rsc = fd_resource(buf->buffer);
                        OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
@@ -524,7 +509,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        emit_marker5(ring, 5);
 
-       if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
+       if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) {
                unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0};
 
                for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
@@ -566,7 +551,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
                        if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid)
                                gras_lrz_cntl = 0;
-                       else if (emit->key.binning_pass && blend->lrz_write && zsa->lrz_write)
+                       else if (emit->binning_pass && blend->lrz_write && zsa->lrz_write)
                                gras_lrz_cntl |= A5XX_GRAS_LRZ_CNTL_LRZ_WRITE;
 
                        OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
@@ -587,7 +572,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa);
-               bool fragz = fp->has_kill | fp->writes_pos;
+               bool fragz = fp->no_earlyz | fp->writes_pos;
 
                OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1);
                OUT_RING(ring, zsa->rb_depth_cntl);
@@ -601,7 +586,8 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                                COND(fragz && fp->frag_coord, A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1));
        }
 
-       if (dirty & FD_DIRTY_SCISSOR) {
+       /* NOTE: scissor enabled bit is part of rasterizer state: */
+       if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) {
                struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
 
                OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
@@ -684,7 +670,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                uint32_t posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
                unsigned nr = pfb->nr_cbufs;
 
-               if (emit->key.binning_pass)
+               if (emit->binning_pass)
                        nr = 0;
                else if (ctx->rasterizer->rasterizer_discard)
                        nr = 0;
@@ -700,10 +686,10 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        }
 
        ir3_emit_vs_consts(vp, ring, ctx, emit->info);
-       if (!emit->key.binning_pass)
+       if (!emit->binning_pass)
                ir3_emit_fs_consts(fp, ring, ctx);
 
-       struct pipe_stream_output_info *info = &vp->shader->stream_output;
+       struct ir3_stream_output_info *info = &vp->shader->stream_output;
        if (info->num_outputs) {
                struct fd_streamout_stateobj *so = &ctx->streamout;
 
@@ -820,10 +806,10 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                emit_border_color(ctx, ring);
 
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
-               emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+               emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT], fp);
 
        if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
-               fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
+               fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
 }
 
 void
@@ -861,10 +847,10 @@ fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                        ~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
 
        if (dirty & FD_DIRTY_SHADER_SSBO)
-               emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
+               emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE], cp);
 
        if (dirty & FD_DIRTY_SHADER_IMAGE)
-               fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
+               fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
 }
 
 /* emit setup at begin of new cmdstream buffer (don't rely on previous
@@ -933,8 +919,19 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
        OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);
        OUT_RING(ring, 0x0000001e);   /* SP_MODE_CNTL */
 
-       OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
-       OUT_RING(ring, 0x40000800);   /* SP_DBG_ECO_CNTL */
+       if (ctx->screen->gpu_id == 540) {
+               OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x800);   /* SP_DBG_ECO_CNTL */
+
+               OUT_PKT4(ring, REG_A5XX_HLSQ_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x800400);
+       } else {
+               OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1);
+               OUT_RING(ring, 0x40000800);   /* SP_DBG_ECO_CNTL */
+       }
 
        OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);
        OUT_RING(ring, 0x00000544);   /* TPL1_MODE_CNTL */
@@ -1102,20 +1099,6 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
        OUT_RING(ring, 0x00000000);
 }
 
-static void
-fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
-{
-       /* for debug after a lock up, write a unique counter value
-        * to scratch6 for each IB, to make it easier to match up
-        * register dumps to cmdstream.  The combination of IB and
-        * DRAW (scratch7) is enough to "triangulate" the particular
-        * draw that caused lockup.
-        */
-       emit_marker5(ring, 6);
-       __OUT_IB5(ring, target);
-       emit_marker5(ring, 6);
-}
-
 static void
 fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
                unsigned dst_off, struct pipe_resource *src, unsigned src_off,
@@ -1136,12 +1119,17 @@ fd5_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,
        }
 }
 
+void
+fd5_emit_init_screen(struct pipe_screen *pscreen)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       screen->emit_const = fd5_emit_const;
+       screen->emit_const_bo = fd5_emit_const_bo;
+       screen->emit_ib = fd5_emit_ib;
+       screen->mem_to_mem = fd5_mem_to_mem;
+}
+
 void
 fd5_emit_init(struct pipe_context *pctx)
 {
-       struct fd_context *ctx = fd_context(pctx);
-       ctx->emit_const = fd5_emit_const;
-       ctx->emit_const_bo = fd5_emit_const_bo;
-       ctx->emit_ib = fd5_emit_ib;
-       ctx->mem_to_mem = fd5_mem_to_mem;
 }