panfrost: Move vertex/tiler payload initialization out of panfrost_draw_vbo()
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
index ba844cb1ca089a1751529ee35cf5716c20c2cb70..1208097f99063a4c9df50645ccd953351c72b1d3 100644 (file)
@@ -183,21 +183,14 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
         return vs->writes_point_size && ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS;
 }
 
-/* Stage the attribute descriptors so we can adjust src_offset
- * to let BOs align nicely */
-
-static void
-panfrost_stage_attributes(struct panfrost_context *ctx)
+void
+panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx,
+                                    struct midgard_payload_vertex_tiler *vp)
 {
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        struct panfrost_vertex_state *so = ctx->vertex;
-
-        size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE;
-        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz);
-        struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
+        if (!ctx->vertex)
+                return;
 
-        /* Copy as-is for the first pass */
-        memcpy(target, so->hw, sz);
+        struct panfrost_vertex_state *so = ctx->vertex;
 
         /* Fixup offsets for the second pass. Recall that the hardware
          * calculates attribute addresses as:
@@ -220,20 +213,20 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
          * QED.
          */
 
-        unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].offset_start;
+        unsigned start = vp->offset_start;
 
         for (unsigned i = 0; i < so->num_elements; ++i) {
                 unsigned vbi = so->pipe[i].vertex_buffer_index;
                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
-                struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
-                mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset;
 
                 /* Adjust by the masked off bits of the offset. Make sure we
                  * read src_offset from so->hw (which is not GPU visible)
                  * rather than target (which is) due to caching effects */
 
-                unsigned src_offset = so->hw[i].src_offset;
-                src_offset += (addr & 63);
+                unsigned src_offset = so->pipe[i].src_offset;
+
+                /* BOs aligned to 4k so guaranteed aligned to 64 */
+                src_offset += (buf->buffer_offset & 63);
 
                 /* Also, somewhat obscurely per-instance data needs to be
                  * offset in response to a delayed start in an indexed draw */
@@ -241,25 +234,8 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
                 if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
                         src_offset -= buf->stride * start;
 
-                target[i].src_offset = src_offset;
+                so->hw[i].src_offset = src_offset;
         }
-
-        /* Let's also include vertex builtins */
-
-        struct mali_attr_meta builtin = {
-                .format = MALI_R32UI,
-                .swizzle = panfrost_get_default_swizzle(1)
-        };
-
-        /* See mali_attr_meta specification for the magic number */
-
-        builtin.index = so->vertexid_index;
-        memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
-
-        builtin.index = so->vertexid_index + 1;
-        memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
-
-        ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
 }
 
 /* Compute number of UBOs active (more specifically, compute the highest UBO
@@ -302,9 +278,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx)
         panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
                                   &ctx->payloads[PIPE_SHADER_FRAGMENT]);
 
-        /* We stage to transient, so always dirty.. */
-        if (ctx->vertex)
-                panfrost_stage_attributes(ctx);
+        panfrost_emit_vertex_attr_meta(batch,
+                                       &ctx->payloads[PIPE_SHADER_VERTEX]);
 
         for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
                 panfrost_emit_sampler_descriptors(batch, i, &ctx->payloads[i]);
@@ -325,29 +300,11 @@ panfrost_queue_draw(struct panfrost_context *ctx)
         /* Handle dirty flags now */
         panfrost_emit_for_draw(ctx);
 
-        /* If rasterizer discard is enable, only submit the vertex */
-
-        bool rasterizer_discard = ctx->rasterizer
-                                  && ctx->rasterizer->base.rasterizer_discard;
-
-
-        struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX];
-        struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT];
-
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
-
-        if (wallpapering) {
-                /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */
-                panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true);
-                panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true);
-        } else  {
-                unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false);
-
-                if (!rasterizer_discard)
-                        panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false);
-        }
 
+        panfrost_emit_vertex_tiler_jobs(batch,
+                                        &ctx->payloads[PIPE_SHADER_VERTEX],
+                                        &ctx->payloads[PIPE_SHADER_FRAGMENT]);
         panfrost_batch_adjust_stack_size(batch);
 }
 
@@ -419,78 +376,6 @@ g2m_draw_mode(enum pipe_prim_type mode)
 
 #undef DEFINE_CASE
 
-static unsigned
-panfrost_translate_index_size(unsigned size)
-{
-        switch (size) {
-        case 1:
-                return MALI_DRAW_INDEXED_UINT8;
-
-        case 2:
-                return MALI_DRAW_INDEXED_UINT16;
-
-        case 4:
-                return MALI_DRAW_INDEXED_UINT32;
-
-        default:
-                unreachable("Invalid index size");
-        }
-}
-
-/* Gets a GPU address for the associated index buffer. Only gauranteed to be
- * good for the duration of the draw (transient), could last longer. Also get
- * the bounds on the index buffer for the range accessed by the draw. We do
- * these operations together because there are natural optimizations which
- * require them to be together. */
-
-static mali_ptr
-panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
-{
-        struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
-
-        off_t offset = info->start * info->index_size;
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        mali_ptr out = 0;
-
-        bool needs_indices = true;
-
-        if (info->max_index != ~0u) {
-                *min_index = info->min_index;
-                *max_index = info->max_index;
-                needs_indices = false;
-        }
-
-        if (!info->has_user_indices) {
-                /* Only resources can be directly mapped */
-                panfrost_batch_add_bo(batch, rsrc->bo,
-                                      PAN_BO_ACCESS_SHARED |
-                                      PAN_BO_ACCESS_READ |
-                                      PAN_BO_ACCESS_VERTEX_TILER);
-                out = rsrc->bo->gpu + offset;
-
-                /* Check the cache */
-                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
-                                                           min_index, max_index);
-        } else {
-                /* Otherwise, we need to upload to transient memory */
-                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
-        }
-
-        if (needs_indices) {
-                /* Fallback */
-                u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
-
-                if (!info->has_user_indices) {
-                        panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
-                                                  *min_index, *max_index);
-                }
-        }
-
-
-        return out;
-}
-
 static bool
 panfrost_scissor_culls_everything(struct panfrost_context *ctx)
 {
@@ -524,6 +409,18 @@ panfrost_statistics_record(
         ctx->tf_prims_generated += prims;
 }
 
+static void
+panfrost_update_streamout_offsets(struct panfrost_context *ctx)
+{
+        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
+                unsigned count;
+
+                count = u_stream_outputs_for_vertices(ctx->active_prim,
+                                                      ctx->vertex_count);
+                ctx->streamout.offsets[i] += count;
+        }
+}
+
 static void
 panfrost_draw_vbo(
         struct pipe_context *pipe,
@@ -568,68 +465,25 @@ panfrost_draw_vbo(
                 }
         }
 
-        ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
-        ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
-
         /* Now that we have a guaranteed terminating path, find the job.
          * Assignment commented out to prevent unused warning */
 
         /* struct panfrost_batch *batch = */ panfrost_get_batch_for_fbo(ctx);
 
-        ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode = g2m_draw_mode(mode);
-
         /* Take into account a negative bias */
         ctx->vertex_count = info->count + abs(info->index_bias);
         ctx->instance_count = info->instance_count;
         ctx->active_prim = info->mode;
 
-        /* For non-indexed draws, they're the same */
-        unsigned vertex_count = ctx->vertex_count;
-
-        unsigned draw_flags = 0;
-
-        /* The draw flags interpret how primitive size is interpreted */
-
-        if (panfrost_writes_point_size(ctx))
-                draw_flags |= MALI_DRAW_VARYING_SIZE;
+        unsigned vertex_count;
 
-        if (info->primitive_restart)
-                draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
-
-        /* These doesn't make much sense */
-
-        draw_flags |= 0x3000;
-
-        if (ctx->rasterizer && ctx->rasterizer->base.flatshade_first)
-                draw_flags |= MALI_DRAW_FLATSHADE_FIRST;
+        panfrost_vt_set_draw_info(ctx, info, g2m_draw_mode(mode),
+                                  &ctx->payloads[PIPE_SHADER_VERTEX],
+                                  &ctx->payloads[PIPE_SHADER_FRAGMENT],
+                                  &vertex_count, &ctx->padded_count);
 
         panfrost_statistics_record(ctx, info);
 
-        if (info->index_size) {
-                unsigned min_index = 0, max_index = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
-                        panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
-
-                /* Use the corresponding values */
-                vertex_count = max_index - min_index + 1;
-                ctx->payloads[PIPE_SHADER_VERTEX].offset_start = min_index + info->index_bias;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = min_index + info->index_bias;
-
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
-
-                draw_flags |= panfrost_translate_index_size(info->index_size);
-        } else {
-                /* Index count == vertex count, if no indexing is applied, as
-                 * if it is internally indexed in the expected order */
-
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
-
-                /* Reverse index state */
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0;
-        }
-
         /* Dispatch "compute jobs" for the vertex/tiler pair as (1,
          * vertex_count, 1) */
 
@@ -639,42 +493,11 @@ panfrost_draw_vbo(
                 1, vertex_count, info->instance_count,
                 1, 1, 1);
 
-        ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.unknown_draw = draw_flags;
-
-        /* Encode the padded vertex count */
-
-        if (info->instance_count > 1) {
-                ctx->padded_count = panfrost_padded_vertex_count(vertex_count);
-
-                unsigned shift = __builtin_ctz(ctx->padded_count);
-                unsigned k = ctx->padded_count >> (shift + 1);
-
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift;
-
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k;
-        } else {
-                ctx->padded_count = vertex_count;
-
-                /* Reset instancing state */
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = 0;
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = 0;
-        }
-
         /* Fire off the draw itself */
         panfrost_queue_draw(ctx);
 
         /* Increment transform feedback offsets */
-
-        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
-                unsigned output_count = u_stream_outputs_for_vertices(
-                                ctx->active_prim, ctx->vertex_count);
-
-                ctx->streamout.offsets[i] += output_count;
-        }
+        panfrost_update_streamout_offsets(ctx);
 }
 
 /* CSO state */
@@ -740,11 +563,14 @@ panfrost_create_vertex_elements_state(
                 so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
 
                 so->hw[i].format = panfrost_find_format(desc);
-
-                /* The field itself should probably be shifted over */
-                so->hw[i].src_offset = elements[i].src_offset;
         }
 
+        /* Let's also prepare vertex builtins */
+        so->hw[PAN_VERTEX_ID].format = MALI_R32UI;
+        so->hw[PAN_VERTEX_ID].swizzle = panfrost_get_default_swizzle(1);
+        so->hw[PAN_INSTANCE_ID].format = MALI_R32UI;
+        so->hw[PAN_INSTANCE_ID].swizzle = panfrost_get_default_swizzle(1);
+
         return so;
 }