src/gallium/drivers/panfrost/pan_compute.c

   1 /*
   2  * Copyright (C) 2019 Collabora, Ltd.
   3  * Copyright (C) 2019 Red Hat Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  * Authors (Collabora):
  25  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  26  *
  27  */
  28
  29 #include "pan_context.h"
  30 #include "pan_cmdstream.h"
  31 #include "panfrost-quirks.h"
  32 #include "pan_bo.h"
  33 #include "util/u_memory.h"
  34 #include "nir_serialize.h"
  35
  36 /* Compute CSOs are tracked like graphics shader CSOs, but are
  37  * considerably simpler. We do not implement multiple
  38  * variants/keying. So the CSO create function just goes ahead and
  39  * compiles the thing. */
  40
  41 static void *
  42 panfrost_create_compute_state(
  43         struct pipe_context *pctx,
  44         const struct pipe_compute_state *cso)
  45 {
  46         struct panfrost_context *ctx = pan_context(pctx);
  47
  48         struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
  49         so->cbase = *cso;
  50         so->is_compute = true;
  51
  52         struct panfrost_shader_state *v = calloc(1, sizeof(*v));
  53         so->variants = v;
  54
  55         so->variant_count = 1;
  56         so->active_variant = 0;
  57
  58         if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
  59                 struct blob_reader reader;
  60                 const struct pipe_binary_program_header *hdr = cso->prog;
  61
  62                 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
  63                 so->cbase.prog = nir_deserialize(NULL, &midgard_nir_options, &reader);
  64                 so->cbase.ir_type = PIPE_SHADER_IR_NIR;
  65         }
  66
  67         panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog,
  68                                 MESA_SHADER_COMPUTE, v, NULL);
  69
  70         return so;
  71 }
  72
  73 static void
  74 panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
  75 {
  76         struct panfrost_context *ctx = pan_context(pipe);
  77
  78         struct panfrost_shader_variants *variants =
  79                 (struct panfrost_shader_variants *) cso;
  80
  81         ctx->shader[PIPE_SHADER_COMPUTE] = variants;
  82 }
  83
  84 static void
  85 panfrost_delete_compute_state(struct pipe_context *pipe, void *cso)
  86 {
  87         free(cso);
  88 }
  89
  90 /* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
  91  * construct the COMPUTE job and some of its payload.
  92  */
  93
  94 static void
  95 panfrost_launch_grid(struct pipe_context *pipe,
  96                 const struct pipe_grid_info *info)
  97 {
  98         struct panfrost_context *ctx = pan_context(pipe);
  99         struct panfrost_device *dev = pan_device(pipe->screen);
 100
 101         /* TODO: Do we want a special compute-only batch? */
 102         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 103
 104         ctx->compute_grid = info;
 105
 106         /* TODO: Stub */
 107         struct midgard_payload_vertex_tiler payload = { 0 };
 108         struct mali_invocation_packed invocation;
 109         struct mali_draw_packed postfix;
 110
 111         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
 112          * reuse the graphics path for this by lowering to Gallium */
 113
 114         struct pipe_constant_buffer ubuf = {
 115                 .buffer = NULL,
 116                 .buffer_offset = 0,
 117                 .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem,
 118                 .user_buffer = info->input
 119         };
 120
 121         if (info->input)
 122                 pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf);
 123
 124         pan_pack(&postfix, DRAW, cfg) {
 125                 cfg.unknown_1 = (dev->quirks & IS_BIFROST) ? 0x2 : 0x6;
 126                 cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
 127                 cfg.shared = panfrost_emit_shared_memory(batch, info);
 128                 cfg.uniform_buffers = panfrost_emit_const_buf(batch,
 129                                 PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
 130                 cfg.textures = panfrost_emit_texture_descriptors(batch,
 131                                 PIPE_SHADER_COMPUTE);
 132                 cfg.samplers = panfrost_emit_sampler_descriptors(batch,
 133                                 PIPE_SHADER_COMPUTE);
 134         }
 135
 136         unsigned magic =
 137                 util_logbase2_ceil(info->block[0] + 1) +
 138                 util_logbase2_ceil(info->block[1] + 1) +
 139                 util_logbase2_ceil(info->block[2] + 1);
 140
 141         payload.prefix.primitive.opaque[0] = (magic) << 26; /* XXX */
 142
 143         memcpy(&payload.postfix, &postfix, sizeof(postfix));
 144
 145         /* Invoke according to the grid info */
 146
 147         panfrost_pack_work_groups_compute(&invocation,
 148                                           info->grid[0], info->grid[1],
 149                                           info->grid[2],
 150                                           info->block[0], info->block[1],
 151                                           info->block[2],
 152                                           false);
 153         payload.prefix.invocation = invocation;
 154
 155         panfrost_new_job(&batch->pool, &batch->scoreboard,
 156                         MALI_JOB_TYPE_COMPUTE, true, 0, &payload,
 157                          sizeof(payload), false);
 158         panfrost_flush_all_batches(ctx, 0);
 159 }
 160
 161 static void
 162 panfrost_set_compute_resources(struct pipe_context *pctx,
 163                          unsigned start, unsigned count,
 164                          struct pipe_surface **resources)
 165 {
 166         /* TODO */
 167 }
 168
 169 static void
 170 panfrost_set_global_binding(struct pipe_context *pctx,
 171                       unsigned first, unsigned count,
 172                       struct pipe_resource **resources,
 173                       uint32_t **handles)
 174 {
 175         /* TODO */
 176 }
 177
 178 static void
 179 panfrost_memory_barrier(struct pipe_context *pctx, unsigned flags)
 180 {
 181         /* TODO */
 182 }
 183
 184 void
 185 panfrost_compute_context_init(struct pipe_context *pctx)
 186 {
 187         pctx->create_compute_state = panfrost_create_compute_state;
 188         pctx->bind_compute_state = panfrost_bind_compute_state;
 189         pctx->delete_compute_state = panfrost_delete_compute_state;
 190
 191         pctx->launch_grid = panfrost_launch_grid;
 192
 193         pctx->set_compute_resources = panfrost_set_compute_resources;
 194         pctx->set_global_binding = panfrost_set_global_binding;
 195
 196         pctx->memory_barrier = panfrost_memory_barrier;
 197 }