From: Tomeu Vizoso Date: Thu, 1 Aug 2019 14:45:50 +0000 (+0200) Subject: panfrost: Allocate shaders in their own BOs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=950b5fc5961f81e466090b5166336510ff416740;p=mesa.git panfrost: Allocate shaders in their own BOs Instead of all shaders being stored in a single BO, have each shader in its own. This removes the need for a 16MB allocation per context, and allows us to place transient blend shaders in BOs marked as executable (before they were allocated in the transient pool, which shouldn't be executable). v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid reading from GPU-accessible memory when patching (Alyssa). - Free struct panfrost_blend_shader (Alyssa). - Give the job a reference to regular shaders when emitting (Alyssa). v3: - Split out the allocation flags change (Rob). Signed-off-by: Tomeu Vizoso Reviewed-by: Alyssa Rosenzweig --- diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 15f77585a25..dd877056d3b 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -43,6 +43,7 @@ panfrost_shader_compile( gl_shader_stage stage, struct panfrost_shader_state *state) { + struct panfrost_screen *screen = pan_screen(ctx->base.screen); uint8_t *dst; nir_shader *s; @@ -80,7 +81,9 @@ panfrost_shader_compile( * I bet someone just thought that would be a cute pun. At least, * that's how I'd do it. */ - meta->shader = panfrost_upload(&ctx->shaders, dst, size) | program.first_tag; + state->bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE); + memcpy(state->bo->cpu, dst, size); + meta->shader = state->bo->gpu | program.first_tag; util_dynarray_fini(&program.compiled); diff --git a/src/gallium/drivers/panfrost/pan_blend.h b/src/gallium/drivers/panfrost/pan_blend.h index a881e0945f4..a29353ff001 100644 --- a/src/gallium/drivers/panfrost/pan_blend.h +++ b/src/gallium/drivers/panfrost/pan_blend.h @@ -33,8 +33,10 @@ /* An internal blend shader descriptor, from the compiler */ struct panfrost_blend_shader { - /* The compiled shader in GPU memory */ - struct panfrost_transfer shader; + struct panfrost_context *ctx; + + /* The compiled shader */ + void *buffer; /* Byte count of the shader */ unsigned size; @@ -53,8 +55,11 @@ struct panfrost_blend_shader { /* A blend shader descriptor ready for actual use */ struct panfrost_blend_shader_final { - /* The upload, possibly to transient memory */ - mali_ptr gpu; + /* The compiled shader in GPU memory, possibly patched */ + struct panfrost_bo *bo; + + /* First instruction tag (for tagging the pointer) */ + unsigned first_tag; /* Same meaning as panfrost_blend_shader */ unsigned work_count; diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index f685b25b41b..08a86980ca8 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -151,11 +151,24 @@ panfrost_bind_blend_state(struct pipe_context *pipe, ctx->dirty |= PAN_DIRTY_FS; } +static void +panfrost_delete_blend_shader(struct hash_entry *entry) +{ + struct panfrost_blend_shader *shader = (struct panfrost_blend_shader *)entry->data; + free(shader->buffer); + free(shader); +} + static void panfrost_delete_blend_state(struct pipe_context *pipe, - void *blend) + void *cso) { - /* TODO: Free shader binary? */ + struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso; + + for (unsigned c = 0; c < 4; ++c) { + struct panfrost_blend_rt *rt = &blend->rt[c]; + _mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader); + } ralloc_free(blend); } @@ -208,6 +221,9 @@ panfrost_blend_constant(float *out, float *in, unsigned mask) struct panfrost_blend_final panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti) { + struct panfrost_screen *screen = pan_screen(ctx->base.screen); + struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); + /* Grab the format, falling back gracefully if called invalidly (which * has to happen for no-color-attachment FBOs, for instance) */ struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer; @@ -241,23 +257,24 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti) struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti); final.is_shader = true; final.shader.work_count = shader->work_count; + final.shader.first_tag = shader->first_tag; + + /* Upload the shader */ + final.shader.bo = panfrost_drm_create_bo(screen, shader->size, PAN_ALLOCATE_EXECUTE); + memcpy(final.shader.bo->cpu, shader->buffer, shader->size); + + /* Pass BO ownership to job */ + panfrost_job_add_bo(job, final.shader.bo); + panfrost_bo_unreference(ctx->base.screen, final.shader.bo); if (shader->patch_index) { /* We have to specialize the blend shader to use constants, so - * patch in the current constants and upload to transient - * memory */ + * patch in the current constants */ - float *patch = (float *) (shader->shader.cpu + shader->patch_index); + float *patch = (float *) (final.shader.bo->cpu + shader->patch_index); memcpy(patch, ctx->blend_color.color, sizeof(float) * 4); - - final.shader.gpu = panfrost_upload_transient( - ctx, shader->shader.cpu, shader->size); - } else { - /* No need to specialize further, use the preuploaded */ - final.shader.gpu = shader->shader.gpu; } - final.shader.gpu |= shader->first_tag; return final; } diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c index 0063290d7e7..2ee86b4e7db 100644 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c @@ -133,6 +133,8 @@ panfrost_compile_blend_shader( { struct panfrost_blend_shader res; + res.ctx = ctx; + /* Build the shader */ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL); @@ -172,21 +174,14 @@ panfrost_compile_blend_shader( midgard_program program; midgard_compile_shader_nir(&ctx->compiler, shader, &program, true); - /* Upload the shader */ - - int size = program.compiled.size; - uint8_t *dst = program.compiled.data; - - res.shader.cpu = mem_dup(dst, size); - res.shader.gpu = panfrost_upload(&ctx->shaders, dst, size); - /* At least two work registers are needed due to an encoding quirk */ res.work_count = MAX2(program.work_register_count, 2); /* Allow us to patch later */ res.patch_index = program.blend_patch_offset; res.first_tag = program.first_tag; - res.size = size; + res.size = program.compiled.size; + res.buffer = program.compiled.data; return res; } diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 284c246677d..2fc5ac36197 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -1062,6 +1062,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false); + panfrost_job_add_bo(job, variant->bo); + #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name COPY(shader); @@ -1140,7 +1142,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) if (blend.is_shader) { ctx->fragment_shader_core.blend.shader = - blend.shader.gpu; + blend.shader.bo->gpu | blend.shader.first_tag; } else { ctx->fragment_shader_core.blend.shader = 0; } @@ -1215,7 +1217,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) assert(!(is_srgb && blend.is_shader)); if (blend.is_shader) { - rts[i].blend.shader = blend.shader.gpu; + rts[i].blend.shader = blend.shader.bo->gpu | blend.shader.first_tag; } else { rts[i].blend.equation = *blend.equation.equation; rts[i].blend.constant = blend.equation.constant; @@ -1904,6 +1906,12 @@ panfrost_delete_shader_state( DBG("Deleting TGSI shader leaks duplicated tokens\n"); } + for (unsigned i = 0; i < cso->variant_count; ++i) { + struct panfrost_shader_state *shader_state = &cso->variants[i]; + panfrost_bo_unreference(pctx->screen, shader_state->bo); + shader_state->bo = NULL; + } + free(so); } @@ -2528,7 +2536,6 @@ panfrost_destroy(struct pipe_context *pipe) util_blitter_destroy(panfrost->blitter_wallpaper); panfrost_drm_free_slab(screen, &panfrost->scratchpad); - panfrost_drm_free_slab(screen, &panfrost->shaders); panfrost_drm_free_slab(screen, &panfrost->tiler_heap); panfrost_drm_free_slab(screen, &panfrost->tiler_dummy); @@ -2673,7 +2680,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx) struct panfrost_screen *screen = pan_screen(gallium->screen); panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0); - panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0); } diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index e8d2417e0cb..9a34e243b74 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -108,7 +108,6 @@ struct panfrost_context { struct pipe_framebuffer_state pipe_framebuffer; struct panfrost_memory cmdstream_persistent; - struct panfrost_memory shaders; struct panfrost_memory scratchpad; struct panfrost_memory tiler_heap; struct panfrost_memory tiler_dummy; @@ -224,6 +223,8 @@ struct panfrost_shader_state { /* Should we enable helper invocations */ bool helper_invocations; + + struct panfrost_bo *bo; }; /* A collection of varyings (the CSO) */ diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c index 42cf1750334..a1f0bb446b5 100644 --- a/src/gallium/drivers/panfrost/pan_drm.c +++ b/src/gallium/drivers/panfrost/pan_drm.c @@ -88,6 +88,9 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size, /* Kernel will fail (confusingly) with EPERM otherwise */ assert(size > 0); + /* To maximize BO cache usage, don't allocate tiny BOs */ + size = MAX2(size, 4096); + unsigned translated_flags = 0; /* TODO: translate flags to kernel flags, if the kernel supports */ @@ -285,7 +288,6 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); /* TODO: Add here the transient pools */ - panfrost_job_add_bo(job, ctx->shaders.bo); panfrost_job_add_bo(job, ctx->scratchpad.bo); panfrost_job_add_bo(job, ctx->tiler_heap.bo); panfrost_job_add_bo(job, job->polygon_list);