panfrost: Move vertex/tiler payload initialization out of panfrost_draw_vbo()
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
index 0f4bca74066d1fd3e86a90c10f26afab7e723c96..1208097f99063a4c9df50645ccd953351c72b1d3 100644 (file)
@@ -174,39 +174,6 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx)
         memcpy(&ctx->payloads[PIPE_SHADER_COMPUTE], &payload, sizeof(payload));
 }
 
-static unsigned
-translate_tex_wrap(enum pipe_tex_wrap w)
-{
-        switch (w) {
-        case PIPE_TEX_WRAP_REPEAT:
-                return MALI_WRAP_REPEAT;
-
-        case PIPE_TEX_WRAP_CLAMP:
-                return MALI_WRAP_CLAMP;
-
-        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-                return MALI_WRAP_CLAMP_TO_EDGE;
-
-        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-                return MALI_WRAP_CLAMP_TO_BORDER;
-
-        case PIPE_TEX_WRAP_MIRROR_REPEAT:
-                return MALI_WRAP_MIRRORED_REPEAT;
-
-        case PIPE_TEX_WRAP_MIRROR_CLAMP:
-                return MALI_WRAP_MIRRORED_CLAMP;
-
-        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-                return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
-
-        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-                return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
-
-        default:
-                unreachable("Invalid wrap");
-        }
-}
-
 bool
 panfrost_writes_point_size(struct panfrost_context *ctx)
 {
@@ -216,21 +183,14 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
         return vs->writes_point_size && ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS;
 }
 
-/* Stage the attribute descriptors so we can adjust src_offset
- * to let BOs align nicely */
-
-static void
-panfrost_stage_attributes(struct panfrost_context *ctx)
+void
+panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx,
+                                    struct midgard_payload_vertex_tiler *vp)
 {
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        struct panfrost_vertex_state *so = ctx->vertex;
-
-        size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE;
-        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz);
-        struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
+        if (!ctx->vertex)
+                return;
 
-        /* Copy as-is for the first pass */
-        memcpy(target, so->hw, sz);
+        struct panfrost_vertex_state *so = ctx->vertex;
 
         /* Fixup offsets for the second pass. Recall that the hardware
          * calculates attribute addresses as:
@@ -253,20 +213,20 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
          * QED.
          */
 
-        unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].offset_start;
+        unsigned start = vp->offset_start;
 
         for (unsigned i = 0; i < so->num_elements; ++i) {
                 unsigned vbi = so->pipe[i].vertex_buffer_index;
                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
-                struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
-                mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset;
 
                 /* Adjust by the masked off bits of the offset. Make sure we
                  * read src_offset from so->hw (which is not GPU visible)
                  * rather than target (which is) due to caching effects */
 
-                unsigned src_offset = so->hw[i].src_offset;
-                src_offset += (addr & 63);
+                unsigned src_offset = so->pipe[i].src_offset;
+
+                /* BOs aligned to 4k so guaranteed aligned to 64 */
+                src_offset += (buf->buffer_offset & 63);
 
                 /* Also, somewhat obscurely per-instance data needs to be
                  * offset in response to a delayed start in an indexed draw */
@@ -274,100 +234,7 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
                 if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
                         src_offset -= buf->stride * start;
 
-                target[i].src_offset = src_offset;
-        }
-
-        /* Let's also include vertex builtins */
-
-        struct mali_attr_meta builtin = {
-                .format = MALI_R32UI,
-                .swizzle = panfrost_get_default_swizzle(1)
-        };
-
-        /* See mali_attr_meta specification for the magic number */
-
-        builtin.index = so->vertexid_index;
-        memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
-
-        builtin.index = so->vertexid_index + 1;
-        memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
-
-        ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
-}
-
-static void
-panfrost_upload_sampler_descriptors(struct panfrost_context *ctx)
-{
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        size_t desc_size = sizeof(struct mali_sampler_descriptor);
-
-        for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
-                mali_ptr upload = 0;
-
-                if (ctx->sampler_count[t]) {
-                        size_t transfer_size = desc_size * ctx->sampler_count[t];
-
-                        struct panfrost_transfer transfer =
-                                panfrost_allocate_transient(batch, transfer_size);
-
-                        struct mali_sampler_descriptor *desc =
-                                (struct mali_sampler_descriptor *) transfer.cpu;
-
-                        for (int i = 0; i < ctx->sampler_count[t]; ++i)
-                                desc[i] = ctx->samplers[t][i]->hw;
-
-                        upload = transfer.gpu;
-                }
-
-                ctx->payloads[t].postfix.sampler_descriptor = upload;
-        }
-}
-
-static mali_ptr
-panfrost_upload_tex(
-        struct panfrost_context *ctx,
-        enum pipe_shader_type st,
-        struct panfrost_sampler_view *view)
-{
-        if (!view)
-                return (mali_ptr) 0;
-
-        struct pipe_sampler_view *pview = &view->base;
-        struct panfrost_resource *rsrc = pan_resource(pview->texture);
-
-        /* Add the BO to the job so it's retained until the job is done. */
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-
-        panfrost_batch_add_bo(batch, rsrc->bo,
-                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
-                              panfrost_bo_access_for_stage(st));
-
-        panfrost_batch_add_bo(batch, view->bo,
-                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
-                              panfrost_bo_access_for_stage(st));
-
-        return view->bo->gpu;
-}
-
-static void
-panfrost_upload_texture_descriptors(struct panfrost_context *ctx)
-{
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-
-        for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
-                mali_ptr trampoline = 0;
-
-                if (ctx->sampler_view_count[t]) {
-                        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-
-                        for (int i = 0; i < ctx->sampler_view_count[t]; ++i)
-                                trampolines[i] =
-                                        panfrost_upload_tex(ctx, t, ctx->sampler_views[t][i]);
-
-                        trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
-                }
-
-                ctx->payloads[t].postfix.texture_trampoline = trampoline;
+                so->hw[i].src_offset = src_offset;
         }
 }
 
@@ -411,15 +278,14 @@ panfrost_emit_for_draw(struct panfrost_context *ctx)
         panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
                                   &ctx->payloads[PIPE_SHADER_FRAGMENT]);
 
-        /* We stage to transient, so always dirty.. */
-        if (ctx->vertex)
-                panfrost_stage_attributes(ctx);
+        panfrost_emit_vertex_attr_meta(batch,
+                                       &ctx->payloads[PIPE_SHADER_VERTEX]);
 
-        panfrost_upload_sampler_descriptors(ctx);
-        panfrost_upload_texture_descriptors(ctx);
-
-        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i)
+        for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i) {
+                panfrost_emit_sampler_descriptors(batch, i, &ctx->payloads[i]);
+                panfrost_emit_texture_descriptors(batch, i, &ctx->payloads[i]);
                 panfrost_emit_const_buf(batch, i, &ctx->payloads[i]);
+        }
 
         /* TODO: Upload the viewport somewhere more appropriate */
 
@@ -434,29 +300,11 @@ panfrost_queue_draw(struct panfrost_context *ctx)
         /* Handle dirty flags now */
         panfrost_emit_for_draw(ctx);
 
-        /* If rasterizer discard is enable, only submit the vertex */
-
-        bool rasterizer_discard = ctx->rasterizer
-                                  && ctx->rasterizer->base.rasterizer_discard;
-
-
-        struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX];
-        struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT];
-
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
-
-        if (wallpapering) {
-                /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */
-                panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true);
-                panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true);
-        } else  {
-                unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false);
-
-                if (!rasterizer_discard)
-                        panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false);
-        }
 
+        panfrost_emit_vertex_tiler_jobs(batch,
+                                        &ctx->payloads[PIPE_SHADER_VERTEX],
+                                        &ctx->payloads[PIPE_SHADER_FRAGMENT]);
         panfrost_batch_adjust_stack_size(batch);
 }
 
@@ -528,78 +376,6 @@ g2m_draw_mode(enum pipe_prim_type mode)
 
 #undef DEFINE_CASE
 
-static unsigned
-panfrost_translate_index_size(unsigned size)
-{
-        switch (size) {
-        case 1:
-                return MALI_DRAW_INDEXED_UINT8;
-
-        case 2:
-                return MALI_DRAW_INDEXED_UINT16;
-
-        case 4:
-                return MALI_DRAW_INDEXED_UINT32;
-
-        default:
-                unreachable("Invalid index size");
-        }
-}
-
-/* Gets a GPU address for the associated index buffer. Only gauranteed to be
- * good for the duration of the draw (transient), could last longer. Also get
- * the bounds on the index buffer for the range accessed by the draw. We do
- * these operations together because there are natural optimizations which
- * require them to be together. */
-
-static mali_ptr
-panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
-{
-        struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
-
-        off_t offset = info->start * info->index_size;
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        mali_ptr out = 0;
-
-        bool needs_indices = true;
-
-        if (info->max_index != ~0u) {
-                *min_index = info->min_index;
-                *max_index = info->max_index;
-                needs_indices = false;
-        }
-
-        if (!info->has_user_indices) {
-                /* Only resources can be directly mapped */
-                panfrost_batch_add_bo(batch, rsrc->bo,
-                                      PAN_BO_ACCESS_SHARED |
-                                      PAN_BO_ACCESS_READ |
-                                      PAN_BO_ACCESS_VERTEX_TILER);
-                out = rsrc->bo->gpu + offset;
-
-                /* Check the cache */
-                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
-                                                           min_index, max_index);
-        } else {
-                /* Otherwise, we need to upload to transient memory */
-                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
-        }
-
-        if (needs_indices) {
-                /* Fallback */
-                u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
-
-                if (!info->has_user_indices) {
-                        panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
-                                                  *min_index, *max_index);
-                }
-        }
-
-
-        return out;
-}
-
 static bool
 panfrost_scissor_culls_everything(struct panfrost_context *ctx)
 {
@@ -633,6 +409,18 @@ panfrost_statistics_record(
         ctx->tf_prims_generated += prims;
 }
 
+static void
+panfrost_update_streamout_offsets(struct panfrost_context *ctx)
+{
+        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
+                unsigned count;
+
+                count = u_stream_outputs_for_vertices(ctx->active_prim,
+                                                      ctx->vertex_count);
+                ctx->streamout.offsets[i] += count;
+        }
+}
+
 static void
 panfrost_draw_vbo(
         struct pipe_context *pipe,
@@ -677,68 +465,25 @@ panfrost_draw_vbo(
                 }
         }
 
-        ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
-        ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
-
         /* Now that we have a guaranteed terminating path, find the job.
          * Assignment commented out to prevent unused warning */
 
         /* struct panfrost_batch *batch = */ panfrost_get_batch_for_fbo(ctx);
 
-        ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode = g2m_draw_mode(mode);
-
         /* Take into account a negative bias */
         ctx->vertex_count = info->count + abs(info->index_bias);
         ctx->instance_count = info->instance_count;
         ctx->active_prim = info->mode;
 
-        /* For non-indexed draws, they're the same */
-        unsigned vertex_count = ctx->vertex_count;
-
-        unsigned draw_flags = 0;
+        unsigned vertex_count;
 
-        /* The draw flags interpret how primitive size is interpreted */
-
-        if (panfrost_writes_point_size(ctx))
-                draw_flags |= MALI_DRAW_VARYING_SIZE;
-
-        if (info->primitive_restart)
-                draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
-
-        /* These doesn't make much sense */
-
-        draw_flags |= 0x3000;
-
-        if (ctx->rasterizer && ctx->rasterizer->base.flatshade_first)
-                draw_flags |= MALI_DRAW_FLATSHADE_FIRST;
+        panfrost_vt_set_draw_info(ctx, info, g2m_draw_mode(mode),
+                                  &ctx->payloads[PIPE_SHADER_VERTEX],
+                                  &ctx->payloads[PIPE_SHADER_FRAGMENT],
+                                  &vertex_count, &ctx->padded_count);
 
         panfrost_statistics_record(ctx, info);
 
-        if (info->index_size) {
-                unsigned min_index = 0, max_index = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
-                        panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
-
-                /* Use the corresponding values */
-                vertex_count = max_index - min_index + 1;
-                ctx->payloads[PIPE_SHADER_VERTEX].offset_start = min_index + info->index_bias;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = min_index + info->index_bias;
-
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
-
-                draw_flags |= panfrost_translate_index_size(info->index_size);
-        } else {
-                /* Index count == vertex count, if no indexing is applied, as
-                 * if it is internally indexed in the expected order */
-
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
-
-                /* Reverse index state */
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0;
-        }
-
         /* Dispatch "compute jobs" for the vertex/tiler pair as (1,
          * vertex_count, 1) */
 
@@ -748,42 +493,11 @@ panfrost_draw_vbo(
                 1, vertex_count, info->instance_count,
                 1, 1, 1);
 
-        ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.unknown_draw = draw_flags;
-
-        /* Encode the padded vertex count */
-
-        if (info->instance_count > 1) {
-                ctx->padded_count = panfrost_padded_vertex_count(vertex_count);
-
-                unsigned shift = __builtin_ctz(ctx->padded_count);
-                unsigned k = ctx->padded_count >> (shift + 1);
-
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift;
-
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k;
-        } else {
-                ctx->padded_count = vertex_count;
-
-                /* Reset instancing state */
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = 0;
-                ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = 0;
-                ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = 0;
-        }
-
         /* Fire off the draw itself */
         panfrost_queue_draw(ctx);
 
         /* Increment transform feedback offsets */
-
-        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
-                unsigned output_count = u_stream_outputs_for_vertices(
-                                ctx->active_prim, ctx->vertex_count);
-
-                ctx->streamout.offsets[i] += output_count;
-        }
+        panfrost_update_streamout_offsets(ctx);
 }
 
 /* CSO state */
@@ -849,11 +563,14 @@ panfrost_create_vertex_elements_state(
                 so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
 
                 so->hw[i].format = panfrost_find_format(desc);
-
-                /* The field itself should probably be shifted over */
-                so->hw[i].src_offset = elements[i].src_offset;
         }
 
+        /* Let's also prepare vertex builtins */
+        so->hw[PAN_VERTEX_ID].format = MALI_R32UI;
+        so->hw[PAN_VERTEX_ID].swizzle = panfrost_get_default_swizzle(1);
+        so->hw[PAN_INSTANCE_ID].format = MALI_R32UI;
+        so->hw[PAN_INSTANCE_ID].swizzle = panfrost_get_default_swizzle(1);
+
         return so;
 }
 
@@ -925,56 +642,7 @@ panfrost_create_sampler_state(
         struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
         so->base = *cso;
 
-        /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */
-
-        bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
-        bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
-        bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
-
-        unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
-        unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
-        unsigned mip_filter = mip_linear  ?
-                (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
-        unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
-
-        struct mali_sampler_descriptor sampler_descriptor = {
-                .filter_mode = min_filter | mag_filter | mip_filter | normalized,
-                .wrap_s = translate_tex_wrap(cso->wrap_s),
-                .wrap_t = translate_tex_wrap(cso->wrap_t),
-                .wrap_r = translate_tex_wrap(cso->wrap_r),
-                .compare_func = panfrost_flip_compare_func(
-                                panfrost_translate_compare_func(
-                                        cso->compare_func)),
-                .border_color = {
-                        cso->border_color.f[0],
-                        cso->border_color.f[1],
-                        cso->border_color.f[2],
-                        cso->border_color.f[3]
-                },
-                .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
-                .max_lod = FIXED_16(cso->max_lod, false),
-                .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
-                .seamless_cube_map = cso->seamless_cube_map,
-        };
-
-        /* If necessary, we disable mipmapping in the sampler descriptor by
-         * clamping the LOD as tight as possible (from 0 to epsilon,
-         * essentially -- remember these are fixed point numbers, so
-         * epsilon=1/256) */
-
-        if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-                sampler_descriptor.max_lod = sampler_descriptor.min_lod;
-
-                /* Enforce that there is something in the middle by adding epsilon*/
-
-                if (sampler_descriptor.min_lod == sampler_descriptor.max_lod)
-                        sampler_descriptor.max_lod++;
-
-                /* Sanity check */
-                assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod);
-        }
-
-        so->hw = sampler_descriptor;
+        panfrost_sampler_desc_init(cso, &so->hw);
 
         return so;
 }