From: James Zhu Date: Fri, 1 Feb 2019 14:24:48 +0000 (-0500) Subject: gallium/auxiliary/vl: Add compute shader to support video compositor render X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f6ac0b5d7187ebb6839fc884e1dbfa8f1dd21eac;p=mesa.git gallium/auxiliary/vl: Add compute shader to support video compositor render Add compute shader to support video compositor render. Signed-off-by: James Zhu Acked-by: Christian König --- diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index a40917b1362..02cc5df70a7 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -350,6 +350,8 @@ VL_SOURCES := \ vl/vl_compositor.h \ vl/vl_compositor_gfx.c \ vl/vl_compositor_gfx.h \ + vl/vl_compositor_cs.c \ + vl/vl_compositor_cs.h \ vl/vl_csc.c \ vl/vl_csc.h \ vl/vl_decoder.c \ diff --git a/src/gallium/auxiliary/meson.build b/src/gallium/auxiliary/meson.build index 97063b46d28..6e5109eb97e 100644 --- a/src/gallium/auxiliary/meson.build +++ b/src/gallium/auxiliary/meson.build @@ -447,6 +447,8 @@ files_libgalliumvl = files( 'vl/vl_compositor.h', 'vl/vl_compositor_gfx.c', 'vl/vl_compositor_gfx.h', + 'vl/vl_compositor_cs.c', + 'vl/vl_compositor_cs.h', 'vl/vl_csc.c', 'vl/vl_csc.h', 'vl/vl_decoder.c', diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index c34bad045b4..5fa1b6ceb30 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -72,6 +72,7 @@ struct vl_compositor_layer struct pipe_viewport_state viewport; void *fs; + void *cs; void *samplers[3]; void *blend; diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c b/src/gallium/auxiliary/vl/vl_compositor_cs.c new file mode 100644 index 00000000000..a3d416cc542 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c @@ -0,0 +1,408 @@ +/************************************************************************** + * + * Copyright 2019 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: James Zhu + * + **************************************************************************/ + +#include + +#include "tgsi/tgsi_text.h" +#include "vl_compositor_cs.h" + +struct cs_viewport { + float scale_x; + float scale_y; + struct u_rect area; + int translate_x; + int translate_y; +}; + +char *compute_shader_video_buffer = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + + "DCL CONST[0..5]\n" + "DCL SVIEW[0..2], RECT, FLOAT\n" + "DCL SAMP[0..2]\n" + + "DCL IMAGE[0], 2D, WR\n" + "DCL TEMP[0..7]\n" + + "IMM[0] UINT32 { 8, 8, 1, 0}\n" + "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" + + "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" + + /* Drawn area check */ + "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" + "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" + + "UIF TEMP[1]\n" + /* Translate */ + "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n" + "U2F TEMP[2], TEMP[2]\n" + "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n" + + /* Scale */ + "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n" + "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n" + + /* Fetch texels */ + "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n" + "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n" + "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n" + + "MOV TEMP[4].w, IMM[1].xxxx\n" + + /* Color Space Conversion */ + "DP4 TEMP[7].x, CONST[0], TEMP[4]\n" + "DP4 TEMP[7].y, CONST[1], TEMP[4]\n" + "DP4 TEMP[7].z, CONST[2], TEMP[4]\n" + + "MOV TEMP[5].w, TEMP[4].zzzz\n" + "SLE TEMP[6].w, TEMP[5], CONST[3].xxxx\n" + "SGT TEMP[5].w, TEMP[5], CONST[3].yyyy\n" + + "MAX TEMP[7].w, TEMP[5], TEMP[6]\n" + + "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n" + "ENDIF\n" + + "END\n"; + +char *compute_shader_weave = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + + "DCL CONST[0..5]\n" + "DCL SVIEW[0..2], RECT, FLOAT\n" + "DCL SAMP[0..2]\n" + + "DCL IMAGE[0], 2D, WR\n" + "DCL TEMP[0..9]\n" + + "IMM[0] UINT32 { 8, 8, 1, 0}\n" + "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" + "IMM[2] UINT32 { 1, 2, 4, 0}\n" + + "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" + + /* Drawn area check */ + "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" + "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" + + "UIF TEMP[1]\n" + "MOV TEMP[2], TEMP[0]\n" + /* Translate */ + "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n" + + /* Texture layer */ + "UMOD TEMP[2].z, TEMP[2].yyyy, IMM[2].yyyy\n" + "UMOD TEMP[3].z, TEMP[2].yyyy, IMM[2].zzzz\n" + "USHR TEMP[3].z, TEMP[3].zzzz, IMM[2].xxxx\n" + + "USHR TEMP[2].y, TEMP[2], IMM[2].xxxx\n" + "USHR TEMP[3].xy, TEMP[2], IMM[2].xxxx\n" + + "U2F TEMP[4], TEMP[2]\n" + "U2F TEMP[5], TEMP[3]\n" + + /* Scale */ + "DIV TEMP[4], TEMP[4], CONST[3].zwzw\n" + "DIV TEMP[5], TEMP[5], CONST[3].zwzw\n" + + /* Fetch texels */ + "TEX_LZ TEMP[6].x, TEMP[4], SAMP[0], RECT\n" + "TEX_LZ TEMP[6].y, TEMP[5], SAMP[1], RECT\n" + "TEX_LZ TEMP[6].z, TEMP[5], SAMP[2], RECT\n" + + "MOV TEMP[6].w, IMM[1].xxxx\n" + + /* Color Space Conversion */ + "DP4 TEMP[9].x, CONST[0], TEMP[6]\n" + "DP4 TEMP[9].y, CONST[1], TEMP[6]\n" + "DP4 TEMP[9].z, CONST[2], TEMP[6]\n" + + "MOV TEMP[7].w, TEMP[6].zzzz\n" + "SLE TEMP[8].w, TEMP[7], CONST[3].xxxx\n" + "SGT TEMP[7].w, TEMP[7], CONST[3].yyyy\n" + + "MAX TEMP[9].w, TEMP[7], TEMP[8]\n" + + "STORE IMAGE[0], TEMP[0], TEMP[9], 2D\n" + "ENDIF\n" + + "END\n"; + +char *compute_shader_rgba = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + + "DCL CONST[0..5]\n" + "DCL SVIEW[0], RECT, FLOAT\n" + "DCL SAMP[0]\n" + + "DCL IMAGE[0], 2D, WR\n" + "DCL TEMP[0..3]\n" + + "IMM[0] UINT32 { 8, 8, 1, 0}\n" + "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" + + "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" + + /* Drawn area check */ + "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" + "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" + "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" + + "UIF TEMP[1]\n" + /* Translate */ + "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n" + "U2F TEMP[2], TEMP[2]\n" + + /* Scale */ + "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n" + + /* Fetch texels */ + "TEX_LZ TEMP[3], TEMP[2], SAMP[0], RECT\n" + + "STORE IMAGE[0], TEMP[0], TEMP[3], 2D\n" + "ENDIF\n" + + "END\n"; + +static void +cs_launch(struct vl_compositor *c, + void *cs) +{ + struct pipe_context *ctx = c->pipe; + + /* Bind the image */ + struct pipe_image_view image = {}; + image.resource = c->fb_state.cbufs[0]->texture; + image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE; + image.format = c->fb_state.cbufs[0]->texture->format; + + ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, &image); + + /* Bind compute shader */ + ctx->bind_compute_state(ctx, cs); + + /* Dispatch compute */ + struct pipe_grid_info info = {}; + info.block[0] = 8; + info.block[1] = 8; + info.block[2] = 1; + info.grid[0] = DIV_ROUND_UP(c->fb_state.width, info.block[0]); + info.grid[1] = DIV_ROUND_UP(c->fb_state.height, info.block[1]); + info.grid[2] = 1; + + ctx->launch_grid(ctx, &info); +} + +static inline struct u_rect +calc_drawn_area(struct vl_compositor_state *s, + struct vl_compositor_layer *layer) +{ + struct vertex2f tl, br; + struct u_rect result; + + assert(s && layer); + + tl = layer->dst.tl; + br = layer->dst.br; + + /* Scale */ + result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0]; + result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1]; + result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0]; + result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1]; + + /* Clip */ + result.x0 = MAX2(result.x0, s->scissor.minx); + result.y0 = MAX2(result.y0, s->scissor.miny); + result.x1 = MIN2(result.x1, s->scissor.maxx); + result.y1 = MIN2(result.y1, s->scissor.maxy); + return result; +} + +static bool +set_viewport(struct vl_compositor_state *s, + struct cs_viewport *drawn) +{ + struct pipe_transfer *buf_transfer; + + assert(s && drawn); + + void *ptr = pipe_buffer_map(s->pipe, s->shader_params, + PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE, + &buf_transfer); + + if (!ptr) + return false; + + float *ptr_float = (float *)ptr; + ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2; + *ptr_float++ = drawn->scale_x; + *ptr_float++ = drawn->scale_y; + + int *ptr_int = (int *)ptr_float; + *ptr_int++ = drawn->area.x0; + *ptr_int++ = drawn->area.y0; + *ptr_int++ = drawn->area.x1; + *ptr_int++ = drawn->area.y1; + *ptr_int++ = drawn->translate_x; + *ptr_int = drawn->translate_y; + + pipe_buffer_unmap(s->pipe, buf_transfer); + + return true; +} + +static void +draw_layers(struct vl_compositor *c, + struct vl_compositor_state *s, + struct u_rect *dirty) +{ + unsigned i; + static struct cs_viewport old_drawn; + + assert(c); + + for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) { + if (s->used_layers & (1 << i)) { + struct vl_compositor_layer *layer = &s->layers[i]; + struct pipe_sampler_view **samplers = &layer->sampler_views[0]; + unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3; + struct cs_viewport drawn; + + drawn.area = calc_drawn_area(s, layer); + drawn.scale_x = layer->viewport.scale[0] / + (float)layer->sampler_views[0]->texture->width0; + drawn.scale_y = drawn.scale_x; + drawn.translate_x = (int)layer->viewport.translate[0]; + drawn.translate_y = (int)layer->viewport.translate[1]; + + if (memcmp(&drawn, &old_drawn, sizeof(struct cs_viewport))) { + set_viewport(s, &drawn); + old_drawn = drawn; + } + + c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0, + num_sampler_views, layer->samplers); + c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, + num_sampler_views, samplers); + + cs_launch(c, layer->cs); + + if (dirty) { + struct u_rect drawn = calc_drawn_area(s, layer); + dirty->x0 = MIN2(drawn.x0, dirty->x0); + dirty->y0 = MIN2(drawn.y0, dirty->y0); + dirty->x1 = MAX2(drawn.x1, dirty->x1); + dirty->y1 = MAX2(drawn.y1, dirty->y1); + } + } + } +} + +void * +vl_compositor_cs_create_shader(struct vl_compositor *c, + const char *compute_shader_text) +{ + assert(c && compute_shader_text); + + struct tgsi_token tokens[1024]; + if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) { + assert(0); + return NULL; + } + + struct pipe_compute_state state = {}; + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + /* create compute shader */ + return c->pipe->create_compute_state(c->pipe, &state); +} + +void +vl_compositor_cs_render(struct vl_compositor_state *s, + struct vl_compositor *c, + struct pipe_surface *dst_surface, + struct u_rect *dirty_area, + bool clear_dirty) +{ + assert(c && s); + assert(dst_surface); + + c->fb_state.width = dst_surface->width; + c->fb_state.height = dst_surface->height; + c->fb_state.cbufs[0] = dst_surface; + + if (!s->scissor_valid) { + s->scissor.minx = 0; + s->scissor.miny = 0; + s->scissor.maxx = dst_surface->width; + s->scissor.maxy = dst_surface->height; + } + + if (clear_dirty && dirty_area && + (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) { + + c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color, + 0, 0, dst_surface->width, dst_surface->height, false); + dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY; + dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY; + } + + pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params); + + draw_layers(c, s, dirty_area); +} diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.h b/src/gallium/auxiliary/vl/vl_compositor_cs.h new file mode 100644 index 00000000000..7a203d327ed --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2019 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: James Zhu + * + **************************************************************************/ + +#ifndef vl_compositor_cs_h +#define vl_compositor_cs_h + +#include "vl_compositor.h" + +char *compute_shader_video_buffer; +char *compute_shader_weave; +char *compute_shader_rgba; + +/** + * create compute shader + */ +void * +vl_compositor_cs_create_shader(struct vl_compositor *c, + const char *compute_shader_text); + +/** + * render the layers to the frontbuffer with compute shader + */ +void +vl_compositor_cs_render(struct vl_compositor_state *s, + struct vl_compositor *c, + struct pipe_surface *dst_surface, + struct u_rect *dirty_area, + bool clear_dirty); + +#endif /* vl_compositor_cs_h */