panfrost: Avoid postfix dep for vertex_data
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
index 259478f0adab897c76e37ef9df23795746be6049..7d7ec775dc0fd7a0ee73e92a26cebfc3213680c5 100644 (file)
@@ -51,12 +51,10 @@ panfrost_bo_access_for_stage(enum pipe_shader_type stage)
                PAN_BO_ACCESS_VERTEX_TILER;
 }
 
-static void
-panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
-                               struct mali_vertex_tiler_postfix *postfix)
+static mali_ptr
+panfrost_vt_emit_shared_memory(struct panfrost_batch *batch)
 {
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
 
         struct mali_shared_memory shared = {
                 .shared_workgroup_count = ~0,
@@ -72,31 +70,7 @@ panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
                 shared.scratchpad = stack->gpu;
         }
 
-        postfix->shared_memory = panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64);
-}
-
-static void
-panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
-                               struct mali_vertex_tiler_postfix *postfix)
-{
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        postfix->shared_memory = panfrost_batch_reserve_framebuffer(batch);
-}
-
-static void
-panfrost_vt_update_rasterizer(struct panfrost_rasterizer *rasterizer,
-                              struct mali_vertex_tiler_prefix *prefix,
-                              struct mali_vertex_tiler_postfix *postfix)
-{
-        postfix->gl_enables |= 0x7;
-        SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
-                rasterizer->base.front_ccw);
-        SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
-                (rasterizer->base.cull_face & PIPE_FACE_FRONT));
-        SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
-                (rasterizer->base.cull_face & PIPE_FACE_BACK));
-        SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
-                rasterizer->base.flatshade_first);
+        return panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64);
 }
 
 void
@@ -115,22 +89,6 @@ panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
         }
 }
 
-static void
-panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
-                                   struct mali_vertex_tiler_postfix *postfix)
-{
-        SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
-        if (ctx->occlusion_query) {
-                postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
-                panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
-                                      PAN_BO_ACCESS_SHARED |
-                                      PAN_BO_ACCESS_RW |
-                                      PAN_BO_ACCESS_FRAGMENT);
-        } else {
-                postfix->occlusion_counter = 0;
-        }
-}
-
 void
 panfrost_vt_init(struct panfrost_context *ctx,
                  enum pipe_shader_type stage,
@@ -138,6 +96,7 @@ panfrost_vt_init(struct panfrost_context *ctx,
                  struct mali_vertex_tiler_postfix *postfix)
 {
         struct panfrost_device *device = pan_device(ctx->base.screen);
+        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
         if (!ctx->shader[stage])
                 return;
@@ -147,15 +106,32 @@ panfrost_vt_init(struct panfrost_context *ctx,
 
         if (device->quirks & IS_BIFROST) {
                 postfix->gl_enables = 0x2;
-                panfrost_vt_emit_shared_memory(ctx, postfix);
+                postfix->shared_memory = panfrost_vt_emit_shared_memory(batch);
         } else {
                 postfix->gl_enables = 0x6;
-                panfrost_vt_attach_framebuffer(ctx, postfix);
+                postfix->shared_memory = panfrost_batch_reserve_framebuffer(batch);
         }
 
         if (stage == PIPE_SHADER_FRAGMENT) {
-                panfrost_vt_update_occlusion_query(ctx, postfix);
-                panfrost_vt_update_rasterizer(ctx->rasterizer, prefix, postfix);
+                if (ctx->occlusion_query) {
+                        postfix->gl_enables |= MALI_OCCLUSION_QUERY;
+                        postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
+                        panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
+                                              PAN_BO_ACCESS_SHARED |
+                                              PAN_BO_ACCESS_RW |
+                                              PAN_BO_ACCESS_FRAGMENT);
+                }
+
+                postfix->gl_enables |= 0x7;
+                struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+                SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
+                        rast->front_ccw);
+                SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
+                        (rast->cull_face & PIPE_FACE_FRONT));
+                SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
+                        (rast->cull_face & PIPE_FACE_BACK));
+                SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
+                        rast->flatshade_first);
         }
 }
 
@@ -286,6 +262,7 @@ panfrost_vt_set_draw_info(struct panfrost_context *ctx,
         }
 
         tiler_prefix->unknown_draw = draw_flags;
+        ctx->offset_start = vertex_postfix->offset_start;
 
         /* Encode the padded vertex count */
 
@@ -719,7 +696,6 @@ panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
 
         struct panfrost_device *dev = pan_device(ctx->base.screen);
         unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
-        void *rts = NULL;
         struct panfrost_transfer xfer;
         unsigned rt_size;
 
@@ -731,35 +707,25 @@ panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
                 rt_size = sizeof(struct midgard_blend_rt);
 
         unsigned desc_size = MALI_STATE_LENGTH + rt_size * rt_count;
-
-        if (rt_size)
-                rts = rzalloc_size(ctx, rt_size * rt_count);
+        xfer = panfrost_pool_alloc_aligned(&batch->pool, desc_size, MALI_STATE_LENGTH);
 
         struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 
         for (unsigned c = 0; c < ctx->pipe_framebuffer.nr_cbufs; ++c)
                 blend[c] = panfrost_get_blend_for_context(ctx, c);
 
+        panfrost_emit_frag_shader(ctx, (struct mali_state_packed *) xfer.cpu, blend);
+
         if (!(dev->quirks & MIDGARD_SFBD))
-                panfrost_emit_blend(batch, rts, blend);
+                panfrost_emit_blend(batch, xfer.cpu + MALI_STATE_LENGTH, blend);
         else
                 batch->draws |= PIPE_CLEAR_COLOR0;
 
-        xfer = panfrost_pool_alloc_aligned(&batch->pool, desc_size, MALI_STATE_LENGTH);
-
-        panfrost_emit_frag_shader(ctx, (struct mali_state_packed *) xfer.cpu, blend);
-
-        memcpy(xfer.cpu + MALI_STATE_LENGTH, rts, rt_size * rt_count);
-
-        if (rt_size)
-                ralloc_free(rts);
-
         return xfer.gpu;
 }
 
-void
-panfrost_emit_viewport(struct panfrost_batch *batch,
-                       struct mali_vertex_tiler_postfix *tiler_postfix)
+mali_ptr
+panfrost_emit_viewport(struct panfrost_batch *batch)
 {
         struct panfrost_context *ctx = batch->ctx;
         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
@@ -804,8 +770,8 @@ panfrost_emit_viewport(struct panfrost_batch *batch,
                 cfg.maximum_z = rast->depth_clip_far ? maxz : INFINITY;
         }
 
-        tiler_postfix->viewport = T.gpu;
         panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
+        return T.gpu;
 }
 
 static mali_ptr
@@ -1010,16 +976,16 @@ panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
                 unreachable("No constant buffer");
 }
 
-void
+mali_ptr
 panfrost_emit_const_buf(struct panfrost_batch *batch,
                         enum pipe_shader_type stage,
-                        struct mali_vertex_tiler_postfix *postfix)
+                        mali_ptr *push_constants)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_shader_variants *all = ctx->shader[stage];
 
         if (!all)
-                return;
+                return 0;
 
         struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
 
@@ -1087,16 +1053,15 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
                 }
         }
 
-        postfix->uniforms = transfer.gpu;
-        postfix->uniform_buffers = ubos.gpu;
+        *push_constants = transfer.gpu;
 
         buf->dirty_mask = 0;
+        return ubos.gpu;
 }
 
-void
+mali_ptr
 panfrost_emit_shared_memory(struct panfrost_batch *batch,
-                            const struct pipe_grid_info *info,
-                            struct midgard_payload_vertex_tiler *vtp)
+                            const struct pipe_grid_info *info)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_device *dev = pan_device(ctx->base.screen);
@@ -1121,8 +1086,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
                 .shared_shift = util_logbase2(single_size) + 1
         };
 
-        vtp->postfix.shared_memory = panfrost_pool_upload_aligned(&batch->pool, &shared,
-                                                               sizeof(shared), 64);
+        return panfrost_pool_upload_aligned(&batch->pool, &shared,
+                        sizeof(shared), 64);
 }
 
 static mali_ptr
@@ -1161,16 +1126,15 @@ panfrost_update_sampler_view(struct panfrost_sampler_view *view,
         }
 }
 
-void
+mali_ptr
 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
-                                  enum pipe_shader_type stage,
-                                  struct mali_vertex_tiler_postfix *postfix)
+                                  enum pipe_shader_type stage)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_device *device = pan_device(ctx->base.screen);
 
         if (!ctx->sampler_view_count[stage])
-                return;
+                return 0;
 
         if (device->quirks & IS_BIFROST) {
                 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
@@ -1200,7 +1164,7 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
                                               panfrost_bo_access_for_stage(stage));
                 }
 
-                postfix->textures = T.gpu;
+                return T.gpu;
         } else {
                 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
@@ -1212,23 +1176,21 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
                         trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
                 }
 
-                postfix->textures = panfrost_pool_upload_aligned(&batch->pool,
-                                                              trampolines,
-                                                              sizeof(uint64_t) *
-                                                              ctx->sampler_view_count[stage],
-                                                              sizeof(uint64_t));
+                return panfrost_pool_upload_aligned(&batch->pool, trampolines,
+                                sizeof(uint64_t) *
+                                ctx->sampler_view_count[stage],
+                                sizeof(uint64_t));
         }
 }
 
-void
+mali_ptr
 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
-                                  enum pipe_shader_type stage,
-                                  struct mali_vertex_tiler_postfix *postfix)
+                                  enum pipe_shader_type stage)
 {
         struct panfrost_context *ctx = batch->ctx;
 
         if (!ctx->sampler_count[stage])
-                return;
+                return 0;
 
         size_t desc_size = MALI_BIFROST_SAMPLER_LENGTH;
         assert(MALI_BIFROST_SAMPLER_LENGTH == MALI_MIDGARD_SAMPLER_LENGTH);
@@ -1240,27 +1202,22 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
         for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i)
                 out[i] = ctx->samplers[stage][i]->hw;
 
-        postfix->sampler_descriptor = T.gpu;
+        return T.gpu;
 }
 
-void
+mali_ptr
 panfrost_emit_vertex_data(struct panfrost_batch *batch,
-                          struct mali_vertex_tiler_postfix *vertex_postfix)
+                          mali_ptr *buffers)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_vertex_state *so = ctx->vertex;
         struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
 
-        unsigned instance_shift = vertex_postfix->instance_shift;
-        unsigned instance_odd = vertex_postfix->instance_odd;
-
         /* Worst case: everything is NPOT, which is only possible if instancing
          * is enabled. Otherwise single record is gauranteed */
-        bool could_npot = instance_shift || instance_odd;
-
         struct panfrost_transfer S = panfrost_pool_alloc_aligned(&batch->pool,
                         MALI_ATTRIBUTE_BUFFER_LENGTH * vs->attribute_count *
-                        (could_npot ? 2 : 1),
+                        (ctx->instance_count > 1 ? 2 : 1),
                         MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
 
         struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
@@ -1325,14 +1282,14 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
 
                 if (!divisor || ctx->instance_count <= 1) {
                         pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
-                                if (ctx->instance_count > 1)
+                                if (ctx->instance_count > 1) {
                                         cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
+                                        cfg.divisor = ctx->padded_count;
+                                }
 
                                 cfg.pointer = addr;
                                 cfg.stride = stride;
                                 cfg.size = size;
-                                cfg.divisor_r = instance_shift;
-                                cfg.divisor_p = instance_odd;
                         }
                 } else if (util_is_power_of_two_or_zero(hw_divisor)) {
                         pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
@@ -1397,8 +1354,6 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
          * addressing modes and now base is 64 aligned.
          */
 
-        unsigned start = vertex_postfix->offset_start;
-
         for (unsigned i = 0; i < so->num_elements; ++i) {
                 unsigned vbi = so->pipe[i].vertex_buffer_index;
                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
@@ -1415,8 +1370,8 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                 /* Also, somewhat obscurely per-instance data needs to be
                  * offset in response to a delayed start in an indexed draw */
 
-                if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
-                        src_offset -= buf->stride * start;
+                if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
+                        src_offset -= buf->stride * ctx->offset_start;
 
                 pan_pack(out + i, ATTRIBUTE, cfg) {
                         cfg.buffer_index = attrib_to_buffer[i];
@@ -1425,8 +1380,8 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                 }
         }
 
-        vertex_postfix->attributes = S.gpu;
-        vertex_postfix->attribute_meta = T.gpu;
+        *buffers = S.gpu;
+        return T.gpu;
 }
 
 static mali_ptr