freedreno/a4xx: better workaround for astc+srgb
[mesa.git] / src / gallium / drivers / freedreno / a4xx / fd4_draw.c
index de5a306af609ac517bdc6d08e8161cfb9651afa7..68e1f53e2dde9e7d9dbdf9c1e4f725ce8c8c14b1 100644 (file)
@@ -47,6 +47,10 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
                struct fd4_emit *emit)
 {
        const struct pipe_draw_info *info = emit->info;
+       enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+
+       if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
+               return;
 
        fd4_emit_state(ctx, ring, emit);
 
@@ -61,7 +65,14 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
        OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
                        info->restart_index : 0xffffffff);
 
+       /* points + psize -> spritelist: */
+       if (ctx->rasterizer->point_size_per_vertex &&
+                       fd4_emit_get_vp(emit)->writes_psize &&
+                       (info->mode == PIPE_PRIM_POINTS))
+               primtype = DI_PT_POINTLIST_PSIZE;
+
        fd4_draw_emit(ctx, ring,
+                       primtype,
                        emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
                        info);
 }
@@ -82,12 +93,14 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
                if (last_key->has_per_samp || key->has_per_samp) {
                        if ((last_key->vsaturate_s != key->vsaturate_s) ||
                                        (last_key->vsaturate_t != key->vsaturate_t) ||
-                                       (last_key->vsaturate_r != key->vsaturate_r))
+                                       (last_key->vsaturate_r != key->vsaturate_r) ||
+                                       (last_key->vastc_srgb != key->vastc_srgb))
                                ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
 
                        if ((last_key->fsaturate_s != key->fsaturate_s) ||
                                        (last_key->fsaturate_t != key->fsaturate_t) ||
-                                       (last_key->fsaturate_r != key->fsaturate_r))
+                                       (last_key->fsaturate_r != key->fsaturate_r) ||
+                                       (last_key->fastc_srgb != key->fastc_srgb))
                                ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
                }
 
@@ -108,7 +121,6 @@ static void
 fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 {
        struct fd4_context *fd4_ctx = fd4_context(ctx);
-       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
        struct fd4_emit emit = {
                .vtx  = &ctx->vtx,
                .prog = &ctx->prog,
@@ -116,21 +128,26 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                .key = {
                        /* do binning pass first: */
                        .binning_pass = true,
-                       .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
-                       .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+                       .color_two_side = ctx->rasterizer->light_twoside,
+                       .rasterflat = ctx->rasterizer->flatshade,
                        // TODO set .half_precision based on render target format,
                        // ie. float16 and smaller use half, float32 use full..
                        .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
-                       .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
+                       .ucp_enables = ctx->rasterizer->clip_plane_enable,
+                       .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
+                                       fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb),
                        .vsaturate_s = fd4_ctx->vsaturate_s,
                        .vsaturate_t = fd4_ctx->vsaturate_t,
                        .vsaturate_r = fd4_ctx->vsaturate_r,
                        .fsaturate_s = fd4_ctx->fsaturate_s,
                        .fsaturate_t = fd4_ctx->fsaturate_t,
                        .fsaturate_r = fd4_ctx->fsaturate_r,
+                       .vastc_srgb = fd4_ctx->vastc_srgb,
+                       .fastc_srgb = fd4_ctx->fastc_srgb,
                },
-               .format = fd4_emit_format(pfb->cbufs[0]),
-               .pformat = pipe_surface_format(pfb->cbufs[0]),
+               .rasterflat = ctx->rasterizer->flatshade,
+               .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+               .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
        };
        unsigned dirty;
 
@@ -144,7 +161,25 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
        emit.key.binning_pass = false;
        emit.dirty = dirty;
        emit.vp = NULL;   /* we changed key so need to refetch vp */
+       emit.fp = NULL;
+
+       if (ctx->rasterizer->rasterizer_discard) {
+               fd_wfi(ctx, ctx->ring);
+               OUT_PKT3(ctx->ring, CP_REG_RMW, 3);
+               OUT_RING(ctx->ring, REG_A4XX_RB_RENDER_CONTROL);
+               OUT_RING(ctx->ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+               OUT_RING(ctx->ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+       }
+
        draw_impl(ctx, ctx->ring, &emit);
+
+       if (ctx->rasterizer->rasterizer_discard) {
+               fd_wfi(ctx, ctx->ring);
+               OUT_PKT3(ctx->ring, CP_REG_RMW, 3);
+               OUT_RING(ctx->ring, REG_A4XX_RB_RENDER_CONTROL);
+               OUT_RING(ctx->ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE);
+               OUT_RING(ctx->ring, 0);
+       }
 }
 
 /* clear operations ignore viewport state, so we need to reset it
@@ -163,6 +198,43 @@ reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-half_height));
 }
 
+/* TODO maybe we should just migrate u_blitter for clear and do it in
+ * core (so we get normal draw pass state mgmt and binning).. That should
+ * work well enough for a3xx/a4xx (but maybe not a2xx?)
+ */
+
+static void
+fd4_clear_binning(struct fd_context *ctx, unsigned dirty)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct fd_ringbuffer *ring = ctx->binning_ring;
+       struct fd4_emit emit = {
+               .vtx  = &fd4_ctx->solid_vbuf_state,
+               .prog = &ctx->solid_prog,
+               .key = {
+                       .binning_pass = true,
+                       .half_precision = true,
+               },
+               .dirty = dirty,
+       };
+
+       fd4_emit_state(ctx, ring, &emit);
+       fd4_emit_vertex_bufs(ring, &emit);
+       reset_viewport(ring, &ctx->framebuffer);
+
+       OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
+       OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_VAROUT(0) |
+                       A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+       OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                       A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES));
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+       OUT_RING(ring, 0x00000002);
+
+       fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                       DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+
 static void
 fd4_clear(struct fd_context *ctx, unsigned buffers,
                const union pipe_color_union *color, double depth, unsigned stencil)
@@ -170,25 +242,23 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
        struct fd4_context *fd4_ctx = fd4_context(ctx);
        struct fd_ringbuffer *ring = ctx->ring;
        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
        unsigned dirty = ctx->dirty;
-       unsigned ce, i;
+       unsigned i;
        struct fd4_emit emit = {
                .vtx  = &fd4_ctx->solid_vbuf_state,
                .prog = &ctx->solid_prog,
                .key = {
-                       .half_precision = true,
+                       .half_precision = fd_half_precision(pfb),
                },
-               .format = fd4_emit_format(pfb->cbufs[0]),
        };
-       uint32_t colr = 0;
-
-       if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
-               colr  = pack_rgba(pfb->cbufs[0]->format, color->f);
 
        dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
        dirty |= FD_DIRTY_PROG;
        emit.dirty = dirty;
 
+       fd4_clear_binning(ctx, dirty);
+
        OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
        OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
 
@@ -257,16 +327,14 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
        if (buffers & PIPE_CLEAR_COLOR) {
                OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
                OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
-               ce = 0xf;
-       } else {
-               ce = 0x0;
        }
 
-       for (i = 0; i < 8; i++) {
+       for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+               mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
+
                OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
-               OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
-                               A4XX_RB_MRT_CONTROL_B11 |
-                               A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+               OUT_RING(ring, A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
+                               A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
 
                OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
                OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
@@ -277,6 +345,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
        }
 
+       OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+       OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+                       A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
        fd4_emit_vertex_bufs(ring, &emit);
 
        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
@@ -285,14 +363,8 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
        OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
        OUT_RING(ring, 0x00000000);
 
-       OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
-       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW0 */
-       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW1 */
-       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW2 */
-       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */
-
        /* until fastclear works: */
-       fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+       fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
        OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */