From c57d5ea3bbb9c136a5b5bbf176a801885efc5a22 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 6 Feb 2018 16:43:24 +0000 Subject: [PATCH] broadcom/vc4: Add an accelerated path to turn raster R8/RG88 into tiled. Drawing a 1080p YV12 video stream generated by MMAL goes from 10.5 FPS to 36. --- src/gallium/drivers/vc4/vc4_blit.c | 200 ++++++++++++++++++++++++++ src/gallium/drivers/vc4/vc4_context.c | 7 + src/gallium/drivers/vc4/vc4_context.h | 4 + 3 files changed, 211 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index f23f74c8f16..7543e9d4aa0 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -24,6 +24,7 @@ #include "util/u_format.h" #include "util/u_surface.h" #include "util/u_blitter.h" +#include "nir_builder.h" #include "vc4_context.h" static struct pipe_surface * @@ -183,9 +184,139 @@ vc4_blitter_save(struct vc4_context *vc4) vc4->fragtex.num_textures, vc4->fragtex.textures); } +static void *vc4_get_yuv_vs(struct pipe_context *pctx) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_screen *pscreen = pctx->screen; + + if (vc4->yuv_linear_blit_vs) + return vc4->yuv_linear_blit_vs; + + const struct nir_shader_compiler_options *options = + pscreen->get_compiler_options(pscreen, + PIPE_SHADER_IR_NIR, + PIPE_SHADER_VERTEX); + + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options); + b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs"); + + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "pos"); + + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + + nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); + + struct pipe_shader_state shader_tmpl = { + .type = PIPE_SHADER_IR_NIR, + .ir.nir = b.shader, + }; + + vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); + + return vc4->yuv_linear_blit_vs; +} + +static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp) +{ + struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_screen *pscreen = pctx->screen; + struct pipe_shader_state **cached_shader; + const char *name; + + if (cpp == 1) { + cached_shader = &vc4->yuv_linear_blit_fs_8bit; + name = "linear_blit_8bit_fs"; + } else { + cached_shader = &vc4->yuv_linear_blit_fs_16bit; + name = "linear_blit_16bit_fs"; + } + + if (*cached_shader) + return *cached_shader; + + const struct nir_shader_compiler_options *options = + pscreen->get_compiler_options(pscreen, + PIPE_SHADER_IR_NIR, + PIPE_SHADER_FRAGMENT); + + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options); + b.shader->info.name = ralloc_strdup(b.shader, name); + + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *glsl_int = glsl_int_type(); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_COLOR; + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "pos"); + pos_in->data.location = VARYING_SLOT_POS; + nir_ssa_def *pos = nir_load_var(&b, pos_in); + + nir_ssa_def *one = nir_imm_int(&b, 1); + nir_ssa_def *two = nir_imm_int(&b, 2); + + nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); + nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); + + nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform, + glsl_int, "stride"); + nir_ssa_def *stride = nir_load_var(&b, stride_in); + + nir_ssa_def *x_offset; + nir_ssa_def *y_offset; + if (cpp == 1) { + nir_ssa_def *intra_utile_x_offset = + nir_ishl(&b, nir_iand(&b, x, one), two); + nir_ssa_def *inter_utile_x_offset = + nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one); + + x_offset = nir_iadd(&b, + intra_utile_x_offset, + inter_utile_x_offset); + y_offset = nir_imul(&b, + nir_iadd(&b, + nir_ishl(&b, y, one), + nir_ushr(&b, nir_iand(&b, x, two), one)), + stride); + } else { + x_offset = nir_ishl(&b, x, two); + y_offset = nir_imul(&b, y, stride); + } + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); + load->num_components = 1; + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL); + load->src[0] = nir_src_for_ssa(one); + load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset)); + nir_builder_instr_insert(&b, &load->instr); + + nir_store_var(&b, color_out, + nir_unpack_unorm_4x8(&b, &load->dest.ssa), + 0xf); + + struct pipe_shader_state shader_tmpl = { + .type = PIPE_SHADER_IR_NIR, + .ir.nir = b.shader, + }; + + *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); + + return *cached_shader; +} + static bool vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { + struct vc4_context *vc4 = vc4_context(pctx); struct vc4_resource *src = vc4_resource(info->src.resource); struct vc4_resource *dst = vc4_resource(info->dst.resource); bool ok; @@ -200,6 +331,75 @@ vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) assert(dst->base.format == src->base.format); assert(dst->tiled); + /* Always 1:1 and at the origin */ + assert(info->src.box.x == 0 && info->dst.box.x == 0); + assert(info->src.box.y == 0 && info->dst.box.y == 0); + assert(info->src.box.width == info->dst.box.width); + assert(info->src.box.height == info->dst.box.height); + + if ((src->slices[info->src.level].offset & 3) || + (src->slices[info->src.level].stride & 3)) { + perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n", + src->slices[info->src.level].offset, + src->slices[info->src.level].stride); + goto fallback; + } + + vc4_blitter_save(vc4); + + /* Create a renderable surface mapping the T-tiled shadow buffer. + */ + struct pipe_surface dst_tmpl; + util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, + info->dst.level, info->dst.box.z); + dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM; + struct pipe_surface *dst_surf = + pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); + if (!dst_surf) { + fprintf(stderr, "Failed to create YUV dst surface\n"); + util_blitter_unset_running_flag(vc4->blitter); + return false; + } + dst_surf->width /= 2; + if (dst->cpp == 1) + dst_surf->height /= 2; + + /* Set the constant buffer. */ + uint32_t stride = src->slices[info->src.level].stride; + struct pipe_constant_buffer cb_uniforms = { + .user_buffer = &stride, + .buffer_size = sizeof(stride), + }; + pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms); + struct pipe_constant_buffer cb_src = { + .buffer = info->src.resource, + .buffer_offset = src->slices[info->src.level].offset, + .buffer_size = (src->bo->size - + src->slices[info->src.level].offset), + }; + pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src); + + /* Unbind the textures, to make sure we don't try to recurse into the + * shadow blit. + */ + pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); + pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); + + util_blitter_custom_shader(vc4->blitter, dst_surf, + vc4_get_yuv_vs(pctx), + vc4_get_yuv_fs(pctx, src->cpp)); + + util_blitter_restore_textures(vc4->blitter); + util_blitter_restore_constant_buffer_state(vc4->blitter); + /* Restore cb1 (util_blitter doesn't handle this one). */ + struct pipe_constant_buffer cb_disabled = { 0 }; + pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled); + + pipe_surface_reference(&dst_surf, NULL); + + return true; + +fallback: /* Do an immediate SW fallback, since the render blit path * would just recurse. */ diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index a9e7ff91f38..c1e041d1ef9 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -115,6 +115,13 @@ vc4_context_destroy(struct pipe_context *pctx) pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL); + if (vc4->yuv_linear_blit_vs) + pctx->delete_vs_state(pctx, vc4->yuv_linear_blit_vs); + if (vc4->yuv_linear_blit_fs_8bit) + pctx->delete_fs_state(pctx, vc4->yuv_linear_blit_fs_8bit); + if (vc4->yuv_linear_blit_fs_16bit) + pctx->delete_fs_state(pctx, vc4->yuv_linear_blit_fs_16bit); + vc4_program_fini(pctx); ralloc_free(vc4); diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d92fcba42c6..16bebeec40b 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -377,6 +377,10 @@ struct vc4_context { struct u_upload_mgr *uploader; + struct pipe_shader_state *yuv_linear_blit_vs; + struct pipe_shader_state *yuv_linear_blit_fs_8bit; + struct pipe_shader_state *yuv_linear_blit_fs_16bit; + /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; struct pipe_blend_state *blend; -- 2.30.2