X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_compute.c;h=3628a38dbf388c59444f367a2952314f37340323;hb=fd41dece39c7d6110951e0fd6fc1693663e7c10d;hp=d0b2e132295c631e69c7f927a1fb0ee6d9ae5101;hpb=5b0a1a4e49b49cb61d9369d3aaacfe1d124571c8;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index d0b2e132295..3628a38dbf3 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2019 Collabora, Ltd. + * Copyright (C) 2019 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,7 +27,11 @@ */ #include "pan_context.h" +#include "pan_cmdstream.h" +#include "panfrost-quirks.h" +#include "pan_bo.h" #include "util/u_memory.h" +#include "nir_serialize.h" /* Compute CSOs are tracked like graphics shader CSOs, but are * considerably simpler. We do not implement multiple @@ -44,18 +49,23 @@ panfrost_create_compute_state( so->cbase = *cso; so->is_compute = true; - struct panfrost_shader_state *v = &so->variants[0]; + struct panfrost_shader_state *v = calloc(1, sizeof(*v)); + so->variants = v; so->variant_count = 1; so->active_variant = 0; - v->tripipe = malloc(sizeof(struct mali_shader_meta)); - - panfrost_shader_compile(ctx, v->tripipe, - cso->ir_type, cso->prog, - MESA_SHADER_COMPUTE, v, NULL); + if (cso->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) { + struct blob_reader reader; + const struct pipe_binary_program_header *hdr = cso->prog; + blob_reader_init(&reader, hdr->blob, hdr->num_bytes); + so->cbase.prog = nir_deserialize(NULL, &midgard_nir_options, &reader); + so->cbase.ir_type = PIPE_SHADER_IR_NIR; + } + panfrost_shader_compile(ctx, so->cbase.ir_type, so->cbase.prog, + MESA_SHADER_COMPUTE, v, NULL); return so; } @@ -86,54 +96,89 @@ panfrost_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct panfrost_context *ctx = pan_context(pipe); + struct panfrost_device *dev = pan_device(pipe->screen); - ctx->compute_grid = info; + /* TODO: Do we want a special compute-only batch? */ + struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - struct mali_job_descriptor_header job = { - .job_type = JOB_TYPE_COMPUTE, - .job_descriptor_size = 1, - .job_barrier = 1 - }; + ctx->compute_grid = info; /* TODO: Stub */ - struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE]; - - panfrost_emit_for_draw(ctx, false); - - /* Compute jobs have a "compute FBD". It's not a real framebuffer - * descriptor - there is no framebuffer - but it takes the place of - * one. As far as I can tell, it's actually the beginning of a - * single-render-target framebuffer descriptor with almost everything - * zeroed out. - */ - struct mali_compute_fbd compute_fbd = { - .unknown1 = { - 0, 0x1F, 0, 0, 0, 0, 0, 0 - } + struct midgard_payload_vertex_tiler payload = { 0 }; + struct mali_invocation_packed invocation; + struct mali_draw_packed postfix; + + /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so + * reuse the graphics path for this by lowering to Gallium */ + + struct pipe_constant_buffer ubuf = { + .buffer = NULL, + .buffer_offset = 0, + .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem, + .user_buffer = info->input }; - payload->postfix.framebuffer = - panfrost_upload_transient(ctx, &compute_fbd, sizeof(compute_fbd)); + if (info->input) + pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &ubuf); - /* Invoke according to the grid info */ + pan_pack(&postfix, DRAW, cfg) { + cfg.unknown_1 = (dev->quirks & IS_BIFROST) ? 0x2 : 0x6; + cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE); + cfg.shared = panfrost_emit_shared_memory(batch, info); + cfg.uniform_buffers = panfrost_emit_const_buf(batch, + PIPE_SHADER_COMPUTE, &cfg.push_uniforms); + cfg.textures = panfrost_emit_texture_descriptors(batch, + PIPE_SHADER_COMPUTE); + cfg.samplers = panfrost_emit_sampler_descriptors(batch, + PIPE_SHADER_COMPUTE); + } - panfrost_pack_work_groups_compute(&payload->prefix, - info->grid[0], info->grid[1], info->grid[2], - info->block[0], info->block[1], info->block[2]); + unsigned magic = + util_logbase2_ceil(info->block[0] + 1) + + util_logbase2_ceil(info->block[1] + 1) + + util_logbase2_ceil(info->block[2] + 1); - /* Upload the payload */ + payload.prefix.primitive.opaque[0] = (magic) << 26; /* XXX */ - struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); - memcpy(transfer.cpu, &job, sizeof(job)); - memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload)); + memcpy(&payload.postfix, &postfix, sizeof(postfix)); - /* TODO: Do we want a special compute-only batch? */ - struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx); + /* Invoke according to the grid info */ - /* Queue the job */ - panfrost_scoreboard_queue_compute_job(batch, transfer); + panfrost_pack_work_groups_compute(&invocation, + info->grid[0], info->grid[1], + info->grid[2], + info->block[0], info->block[1], + info->block[2], + false); + payload.prefix.invocation = invocation; + + panfrost_new_job(&batch->pool, &batch->scoreboard, + MALI_JOB_TYPE_COMPUTE, true, 0, &payload, + sizeof(payload), false); + panfrost_flush_all_batches(ctx, 0); +} - panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); +static void +panfrost_set_compute_resources(struct pipe_context *pctx, + unsigned start, unsigned count, + struct pipe_surface **resources) +{ + /* TODO */ +} + +static void +panfrost_set_global_binding(struct pipe_context *pctx, + unsigned first, unsigned count, + struct pipe_resource **resources, + uint32_t **handles) +{ + /* TODO */ +} + +static void +panfrost_memory_barrier(struct pipe_context *pctx, unsigned flags) +{ + /* TODO */ } void @@ -144,6 +189,9 @@ panfrost_compute_context_init(struct pipe_context *pctx) pctx->delete_compute_state = panfrost_delete_compute_state; pctx->launch_grid = panfrost_launch_grid; -} + pctx->set_compute_resources = panfrost_set_compute_resources; + pctx->set_global_binding = panfrost_set_global_binding; + pctx->memory_barrier = panfrost_memory_barrier; +}