src/gallium/drivers/panfrost/pan_compute.c

   1 /*
   2  * Copyright (C) 2019 Collabora, Ltd.
   3  * Copyright (C) 2019 Red Hat Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  * Authors (Collabora):
  25  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  26  *
  27  */
  28
  29 #include "pan_context.h"
  30 #include "pan_bo.h"
  31 #include "util/u_memory.h"
  32 #include "nir_serialize.h"
  33
  34 /* Compute CSOs are tracked like graphics shader CSOs, but are
  35  * considerably simpler. We do not implement multiple
  36  * variants/keying. So the CSO create function just goes ahead and
  37  * compiles the thing. */
  38
  39 static void *
  40 panfrost_create_compute_state(
  41         struct pipe_context *pctx,
  42         const struct pipe_compute_state *cso)
  43 {
  44         struct panfrost_context *ctx = pan_context(pctx);
  45
  46         struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
  47         so->cbase = *cso;
  48         so->is_compute = true;
  49
  50         struct panfrost_shader_state *v = calloc(1, sizeof(*v));
  51         so->variants = v;
  52
  53         so->variant_count = 1;
  54         so->active_variant = 0;
  55
  56         /* calloc, instead of malloc - to zero unused fields */
  57         v->tripipe = CALLOC_STRUCT(mali_shader_meta);
  58
  59         if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
  60                 struct blob_reader reader;
  61                 const struct pipe_binary_program_header *hdr = cso->prog;
  62
  63                 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
  64                 so->cbase.prog = nir_deserialize(NULL, &midgard_nir_options, &reader);
  65                 so->cbase.ir_type = PIPE_SHADER_IR_NIR;
  66         }
  67
  68         panfrost_shader_compile(ctx, v->tripipe,
  69                         so->cbase.ir_type, so->cbase.prog,
  70                         MESA_SHADER_COMPUTE, v, NULL);
  71
  72         return so;
  73 }
  74
  75 static void
  76 panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
  77 {
  78         struct panfrost_context *ctx = pan_context(pipe);
  79
  80         struct panfrost_shader_variants *variants =
  81                 (struct panfrost_shader_variants *) cso;
  82
  83         ctx->shader[PIPE_SHADER_COMPUTE] = variants;
  84 }
  85
  86 static void
  87 panfrost_delete_compute_state(struct pipe_context *pipe, void *cso)
  88 {
  89         free(cso);
  90 }
  91
  92 /* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
  93  * construct the COMPUTE job and some of its payload.
  94  */
  95
  96 static void
  97 panfrost_launch_grid(struct pipe_context *pipe,
  98                 const struct pipe_grid_info *info)
  99 {
 100         struct panfrost_context *ctx = pan_context(pipe);
 101
 102         /* TODO: Do we want a special compute-only batch? */
 103         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 104
 105         ctx->compute_grid = info;
 106
 107         /* TODO: Stub */
 108         struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE];
 109         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
 110         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
 111
 112         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
 113          * reuse the graphics path for this by lowering to Gallium */
 114
 115         struct pipe_constant_buffer ubuf = {
 116                 .buffer = NULL,
 117                 .buffer_offset = 0,
 118                 .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem,
 119                 .user_buffer = info->input
 120         };
 121
 122         if (info->input)
 123                 pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf);
 124
 125         panfrost_emit_for_draw(ctx, false);
 126
 127         unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size, 128));
 128         unsigned shared_size = single_size * info->grid[0] * info->grid[1] * info->grid[2] * 4;
 129
 130         struct mali_shared_memory shared = {
 131                 .shared_memory = panfrost_batch_get_shared_memory(batch, shared_size, 1)->gpu,
 132                 .shared_workgroup_count =
 133                         util_logbase2_ceil(info->grid[0]) +
 134                         util_logbase2_ceil(info->grid[1]) +
 135                         util_logbase2_ceil(info->grid[2]),
 136                 .shared_unk1 = 0x2,
 137                 .shared_shift = util_logbase2(single_size) - 1
 138         };
 139
 140         payload->postfix.shared_memory =
 141                 panfrost_upload_transient(batch, &shared, sizeof(shared));
 142
 143         /* Invoke according to the grid info */
 144
 145         panfrost_pack_work_groups_compute(&payload->prefix,
 146                         info->grid[0], info->grid[1], info->grid[2],
 147                         info->block[0], info->block[1], info->block[2], false);
 148
 149         panfrost_new_job(batch, JOB_TYPE_COMPUTE, true, 0, payload, sizeof(*payload), false);
 150         panfrost_flush_all_batches(ctx, true);
 151 }
 152
 153 static void
 154 panfrost_set_compute_resources(struct pipe_context *pctx,
 155                          unsigned start, unsigned count,
 156                          struct pipe_surface **resources)
 157 {
 158         /* TODO */
 159 }
 160
 161 static void
 162 panfrost_set_global_binding(struct pipe_context *pctx,
 163                       unsigned first, unsigned count,
 164                       struct pipe_resource **resources,
 165                       uint32_t **handles)
 166 {
 167         /* TODO */
 168 }
 169
 170 static void
 171 panfrost_memory_barrier(struct pipe_context *pctx, unsigned flags)
 172 {
 173         /* TODO */
 174 }
 175
 176 void
 177 panfrost_compute_context_init(struct pipe_context *pctx)
 178 {
 179         pctx->create_compute_state = panfrost_create_compute_state;
 180         pctx->bind_compute_state = panfrost_bind_compute_state;
 181         pctx->delete_compute_state = panfrost_delete_compute_state;
 182
 183         pctx->launch_grid = panfrost_launch_grid;
 184
 185         pctx->set_compute_resources = panfrost_set_compute_resources;
 186         pctx->set_global_binding = panfrost_set_global_binding;
 187
 188         pctx->memory_barrier = panfrost_memory_barrier;
 189 }