panfrost: Move vertex/tiler payload initialization out of panfrost_draw_vbo()

[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c

index ddef5c107649146fc3e2a335e9aa6fa01ffa38b7..a4816363ddf5a6bb36c0e7a4ebbaf024379b457f 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -23,6 +23,7 @@
   */
  
  #include "util/macros.h"
+#include "util/u_vbuf.h"
  
  #include "panfrost-quirks.h"
  
@@ -99,6 +100,148 @@ panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
                  tp->postfix.occlusion_counter = 0;
  }
  
+static unsigned
+panfrost_translate_index_size(unsigned size)
+{
+        switch (size) {
+        case 1:
+                return MALI_DRAW_INDEXED_UINT8;
+
+        case 2:
+                return MALI_DRAW_INDEXED_UINT16;
+
+        case 4:
+                return MALI_DRAW_INDEXED_UINT32;
+
+        default:
+                unreachable("Invalid index size");
+        }
+}
+
+/* Gets a GPU address for the associated index buffer. Only gauranteed to be
+ * good for the duration of the draw (transient), could last longer. Also get
+ * the bounds on the index buffer for the range accessed by the draw. We do
+ * these operations together because there are natural optimizations which
+ * require them to be together. */
+
+static mali_ptr
+panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
+                                  const struct pipe_draw_info *info,
+                                  unsigned *min_index, unsigned *max_index)
+{
+        struct panfrost_resource *rsrc = pan_resource(info->index.resource);
+        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        off_t offset = info->start * info->index_size;
+        bool needs_indices = true;
+        mali_ptr out = 0;
+
+        if (info->max_index != ~0u) {
+                *min_index = info->min_index;
+                *max_index = info->max_index;
+                needs_indices = false;
+        }
+
+        if (!info->has_user_indices) {
+                /* Only resources can be directly mapped */
+                panfrost_batch_add_bo(batch, rsrc->bo,
+                                      PAN_BO_ACCESS_SHARED |
+                                      PAN_BO_ACCESS_READ |
+                                      PAN_BO_ACCESS_VERTEX_TILER);
+                out = rsrc->bo->gpu + offset;
+
+                /* Check the cache */
+                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
+                                                           info->start,
+                                                           info->count,
+                                                           min_index,
+                                                           max_index);
+        } else {
+                /* Otherwise, we need to upload to transient memory */
+                const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
+                out = panfrost_upload_transient(batch, ibuf8 + offset,
+                                                info->count *
+                                                info->index_size);
+        }
+
+        if (needs_indices) {
+                /* Fallback */
+                u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
+
+                if (!info->has_user_indices)
+                        panfrost_minmax_cache_add(rsrc->index_cache,
+                                                  info->start, info->count,
+                                                  *min_index, *max_index);
+        }
+
+        return out;
+}
+
+void
+panfrost_vt_set_draw_info(struct panfrost_context *ctx,
+                          const struct pipe_draw_info *info,
+                          enum mali_draw_mode draw_mode,
+                          struct midgard_payload_vertex_tiler *vp,
+                          struct midgard_payload_vertex_tiler *tp,
+                          unsigned *vertex_count,
+                          unsigned *padded_count)
+{
+        tp->prefix.draw_mode = draw_mode;
+
+        unsigned draw_flags = 0;
+
+        if (panfrost_writes_point_size(ctx))
+                draw_flags |= MALI_DRAW_VARYING_SIZE;
+
+        if (info->primitive_restart)
+                draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
+
+        /* These doesn't make much sense */
+
+        draw_flags |= 0x3000;
+
+        if (info->index_size) {
+                unsigned min_index = 0, max_index = 0;
+
+                tp->prefix.indices = panfrost_get_index_buffer_bounded(ctx,
+                                                                       info,
+                                                                       &min_index,
+                                                                       &max_index);
+
+                /* Use the corresponding values */
+                *vertex_count = max_index - min_index + 1;
+                tp->offset_start = vp->offset_start = min_index + info->index_bias;
+                tp->prefix.offset_bias_correction = -min_index;
+                tp->prefix.index_count = MALI_POSITIVE(info->count);
+                draw_flags |= panfrost_translate_index_size(info->index_size);
+        } else {
+                tp->prefix.indices = 0;
+                *vertex_count = ctx->vertex_count;
+                tp->offset_start = vp->offset_start = info->start;
+                tp->prefix.offset_bias_correction = 0;
+                tp->prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
+        }
+
+        tp->prefix.unknown_draw = draw_flags;
+
+        /* Encode the padded vertex count */
+
+        if (info->instance_count > 1) {
+                *padded_count = panfrost_padded_vertex_count(*vertex_count);
+
+                unsigned shift = __builtin_ctz(ctx->padded_count);
+                unsigned k = ctx->padded_count >> (shift + 1);
+
+                tp->instance_shift = vp->instance_shift = shift;
+                tp->instance_odd = vp->instance_odd = k;
+        } else {
+                *padded_count = *vertex_count;
+
+                /* Reset instancing state */
+                tp->instance_shift = vp->instance_shift = 0;
+                tp->instance_odd = vp->instance_odd = 0;
+        }
+}
+
  static void
  panfrost_shader_meta_init(struct panfrost_context *ctx,
                            enum pipe_shader_type st,
@@ -120,7 +263,7 @@ panfrost_shader_meta_init(struct panfrost_context *ctx,
          meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
  }
  
-unsigned
+static unsigned
  panfrost_translate_compare_func(enum pipe_compare_func in)
  {
          switch (in) {
@@ -186,6 +329,80 @@ panfrost_translate_stencil_op(enum pipe_stencil_op in)
          }
  }
  
+static unsigned
+translate_tex_wrap(enum pipe_tex_wrap w)
+{
+        switch (w) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return MALI_WRAP_REPEAT;
+
+        case PIPE_TEX_WRAP_CLAMP:
+                return MALI_WRAP_CLAMP;
+
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return MALI_WRAP_CLAMP_TO_EDGE;
+
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return MALI_WRAP_CLAMP_TO_BORDER;
+
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return MALI_WRAP_MIRRORED_REPEAT;
+
+        case PIPE_TEX_WRAP_MIRROR_CLAMP:
+                return MALI_WRAP_MIRRORED_CLAMP;
+
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+                return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
+
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+                return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
+
+        default:
+                unreachable("Invalid wrap");
+        }
+}
+
+void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
+                                struct mali_sampler_descriptor *hw)
+{
+        unsigned func = panfrost_translate_compare_func(cso->compare_func);
+        bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
+        bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
+        bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
+        unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
+        unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
+        unsigned mip_filter = mip_linear  ?
+                              (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
+        unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
+
+        *hw = (struct mali_sampler_descriptor) {
+                .filter_mode = min_filter | mag_filter | mip_filter |
+                               normalized,
+                .wrap_s = translate_tex_wrap(cso->wrap_s),
+                .wrap_t = translate_tex_wrap(cso->wrap_t),
+                .wrap_r = translate_tex_wrap(cso->wrap_r),
+                .compare_func = panfrost_flip_compare_func(func),
+                .border_color = {
+                        cso->border_color.f[0],
+                        cso->border_color.f[1],
+                        cso->border_color.f[2],
+                        cso->border_color.f[3]
+                },
+                .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
+                .max_lod = FIXED_16(cso->max_lod, false),
+                .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
+                .seamless_cube_map = cso->seamless_cube_map,
+        };
+
+        /* If necessary, we disable mipmapping in the sampler descriptor by
+         * clamping the LOD as tight as possible (from 0 to epsilon,
+         * essentially -- remember these are fixed point numbers, so
+         * epsilon=1/256) */
+
+        if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
+                hw->max_lod = hw->min_lod + 1;
+}
+
  static void
  panfrost_make_stencil_state(const struct pipe_stencil_state *in,
                              struct mali_stencil_test *out)
@@ -903,3 +1120,121 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
          vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
                                                                 sizeof(shared));
  }
+
+static mali_ptr
+panfrost_get_tex_desc(struct panfrost_batch *batch,
+                      enum pipe_shader_type st,
+                      struct panfrost_sampler_view *view)
+{
+        if (!view)
+                return (mali_ptr) 0;
+
+        struct pipe_sampler_view *pview = &view->base;
+        struct panfrost_resource *rsrc = pan_resource(pview->texture);
+
+        /* Add the BO to the job so it's retained until the job is done. */
+
+        panfrost_batch_add_bo(batch, rsrc->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
+
+        panfrost_batch_add_bo(batch, view->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
+
+        return view->bo->gpu;
+}
+
+void
+panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
+                                  enum pipe_shader_type stage,
+                                  struct midgard_payload_vertex_tiler *vtp)
+{
+        struct panfrost_context *ctx = batch->ctx;
+
+        if (!ctx->sampler_view_count[stage])
+                return;
+
+        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+         for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
+                trampolines[i] = panfrost_get_tex_desc(batch, stage,
+                                                       ctx->sampler_views[stage][i]);
+
+         vtp->postfix.texture_trampoline = panfrost_upload_transient(batch,
+                                                                     trampolines,
+                                                                     sizeof(uint64_t) *
+                                                                     ctx->sampler_view_count[stage]);
+}
+
+void
+panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
+                                  enum pipe_shader_type stage,
+                                  struct midgard_payload_vertex_tiler *vtp)
+{
+        struct panfrost_context *ctx = batch->ctx;
+
+        if (!ctx->sampler_count[stage])
+                return;
+
+        size_t desc_size = sizeof(struct mali_sampler_descriptor);
+        size_t transfer_size = desc_size * ctx->sampler_count[stage];
+        struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
+                                                                        transfer_size);
+        struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
+
+        for (int i = 0; i < ctx->sampler_count[stage]; ++i)
+                desc[i] = ctx->samplers[stage][i]->hw;
+
+        vtp->postfix.sampler_descriptor = transfer.gpu;
+}
+
+void
+panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
+                               struct midgard_payload_vertex_tiler *vp)
+{
+        struct panfrost_context *ctx = batch->ctx;
+
+        if (!ctx->vertex)
+                return;
+
+        struct panfrost_vertex_state *so = ctx->vertex;
+
+        panfrost_vertex_state_upd_attr_offs(ctx, vp);
+        vp->postfix.attribute_meta = panfrost_upload_transient(batch, so->hw,
+                                                               sizeof(*so->hw) *
+                                                               PAN_MAX_ATTRIBUTE);
+}
+
+void
+panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
+                                struct midgard_payload_vertex_tiler *vp,
+                                struct midgard_payload_vertex_tiler *tp)
+{
+        struct panfrost_context *ctx = batch->ctx;
+        bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
+
+        if (wallpapering) {
+                /* Inject in reverse order, with "predicted" job indices.
+                 * THIS IS A HACK XXX */
+                panfrost_new_job(batch, JOB_TYPE_TILER, false,
+                                 batch->job_index + 2, tp, sizeof(*tp), true);
+                panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
+                                 vp, sizeof(*vp), true);
+                return;
+        }
+
+        /* If rasterizer discard is enable, only submit the vertex */
+
+        bool rasterizer_discard = ctx->rasterizer &&
+                                  ctx->rasterizer->base.rasterizer_discard;
+
+        unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
+                                           vp, sizeof(*vp), false);
+
+        if (rasterizer_discard)
+                return;
+
+        panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, sizeof(*tp),
+                         false);
+}