panfrost: Emit texture/sampler points for compute
[mesa.git] / src / gallium / drivers / panfrost / pan_compute.c
index 1901f58dda7f73654b268f818d5ab406805ef452..3628a38dbf388c59444f367a2952314f37340323 100644 (file)
@@ -27,6 +27,8 @@
  */
 
 #include "pan_context.h"
+#include "pan_cmdstream.h"
+#include "panfrost-quirks.h"
 #include "pan_bo.h"
 #include "util/u_memory.h"
 #include "nir_serialize.h"
@@ -53,9 +55,6 @@ panfrost_create_compute_state(
         so->variant_count = 1;
         so->active_variant = 0;
 
-        /* calloc, instead of malloc - to zero unused fields */
-        v->tripipe = CALLOC_STRUCT(mali_shader_meta);
-
         if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
                 struct blob_reader reader;
                 const struct pipe_binary_program_header *hdr = cso->prog;
@@ -65,9 +64,8 @@ panfrost_create_compute_state(
                 so->cbase.ir_type = PIPE_SHADER_IR_NIR;
         }
 
-        panfrost_shader_compile(ctx, v->tripipe,
-                        so->cbase.ir_type, so->cbase.prog,
-                        MESA_SHADER_COMPUTE, v, NULL);
+        panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
+                                MESA_SHADER_COMPUTE, v, NULL);
 
         return so;
 }
@@ -98,22 +96,17 @@ panfrost_launch_grid(struct pipe_context *pipe,
                 const struct pipe_grid_info *info)
 {
         struct panfrost_context *ctx = pan_context(pipe);
+        struct panfrost_device *dev = pan_device(pipe->screen);
 
         /* TODO: Do we want a special compute-only batch? */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
         ctx->compute_grid = info;
 
-        struct mali_job_descriptor_header job = {
-                .job_type = JOB_TYPE_COMPUTE,
-                .job_descriptor_size = 1,
-                .job_barrier = 1
-        };
-
         /* TODO: Stub */
-        struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE];
-        struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
-        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
+        struct midgard_payload_vertex_tiler payload = { 0 };
+        struct mali_invocation_packed invocation;
+        struct mali_draw_packed postfix;
 
         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
          * reuse the graphics path for this by lowering to Gallium */
@@ -128,40 +121,41 @@ panfrost_launch_grid(struct pipe_context *pipe,
         if (info->input)
                 pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf);
 
-        panfrost_emit_for_draw(ctx, false);
+        pan_pack(&postfix, DRAW, cfg) {
+                cfg.unknown_1 = (dev->quirks & IS_BIFROST) ? 0x2 : 0x6;
+                cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
+                cfg.shared = panfrost_emit_shared_memory(batch, info);
+                cfg.uniform_buffers = panfrost_emit_const_buf(batch,
+                                PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
+                cfg.textures = panfrost_emit_texture_descriptors(batch,
+                                PIPE_SHADER_COMPUTE);
+                cfg.samplers = panfrost_emit_sampler_descriptors(batch,
+                                PIPE_SHADER_COMPUTE);
+        }
 
-        unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size, 128));
-        unsigned shared_size = single_size * info->grid[0] * info->grid[1] * info->grid[2] * 4;
+        unsigned magic =
+                util_logbase2_ceil(info->block[0] + 1) +
+                util_logbase2_ceil(info->block[1] + 1) +
+                util_logbase2_ceil(info->block[2] + 1);
 
-        struct mali_shared_memory shared = {
-                .shared_memory = panfrost_batch_get_shared_memory(batch, shared_size, 1)->gpu,
-                .shared_workgroup_count =
-                        util_logbase2_ceil(info->grid[0]) +
-                        util_logbase2_ceil(info->grid[1]) +
-                        util_logbase2_ceil(info->grid[2]),
-                .shared_unk1 = 0x2,
-                .shared_shift = util_logbase2(single_size) - 1
-        };
+        payload.prefix.primitive.opaque[0] = (magic) << 26; /* XXX */
 
-        payload->postfix.shared_memory =
-                panfrost_upload_transient(batch, &shared, sizeof(shared));
+        memcpy(&payload.postfix, &postfix, sizeof(postfix));
 
         /* Invoke according to the grid info */
 
-        panfrost_pack_work_groups_compute(&payload->prefix,
-                        info->grid[0], info->grid[1], info->grid[2],
-                        info->block[0], info->block[1], info->block[2], false);
-
-        /* Upload the payload */
-
-        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload));
-        memcpy(transfer.cpu, &job, sizeof(job));
-        memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
-
-        /* Queue the job */
-        panfrost_scoreboard_queue_compute_job(batch, transfer);
-
-        panfrost_flush_all_batches(ctx, true);
+        panfrost_pack_work_groups_compute(&invocation,
+                                          info->grid[0], info->grid[1],
+                                          info->grid[2],
+                                          info->block[0], info->block[1],
+                                          info->block[2],
+                                          false);
+        payload.prefix.invocation = invocation;
+
+        panfrost_new_job(&batch->pool, &batch->scoreboard,
+                        MALI_JOB_TYPE_COMPUTE, true, 0, &payload,
+                         sizeof(payload), false);
+        panfrost_flush_all_batches(ctx, 0);
 }
 
 static void