From 7f04fe5338d0846ec9a6003033da5357d2785c8b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Apr 2011 01:20:15 +0200 Subject: [PATCH] [g3dvl] merge idct stage 2 and mc ycbcr stage into a single draw --- src/gallium/auxiliary/vl/vl_idct.c | 312 +++++++++---------- src/gallium/auxiliary/vl/vl_idct.h | 52 +++- src/gallium/auxiliary/vl/vl_mc.c | 67 ++-- src/gallium/auxiliary/vl/vl_mc.h | 22 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 145 ++++++--- src/gallium/auxiliary/vl/vl_mpeg12_decoder.h | 4 +- 6 files changed, 344 insertions(+), 258 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index ca3b1cb53ab..ebb4ad4fe0c 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -86,8 +86,54 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc); } +static void +increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], + struct ureg_src saddr[2], bool right_side, bool transposed, + int pos, float size) +{ + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; + unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; + + /* + * daddr[0..1].(start) = saddr[0..1].(start) + * daddr[0..1].(tc) = saddr[0..1].(tc) + */ + + ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); + ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); + ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); + ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); +} + +static void +fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) +{ + ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); + ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); +} + +static void +matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) +{ + struct ureg_dst tmp; + + tmp = ureg_DECL_temporary(shader); + + /* + * tmp.xy = dot4(m[0][0..1], m[1][0..1]) + * dst = tmp.x + tmp.y + */ + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); + ureg_ADD(shader, dst, + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); + + ureg_release_temporary(shader, tmp); +} + static void * -create_vert_shader(struct vl_idct *idct, bool matrix_stage) +create_stage1_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src vrect, vpos; @@ -99,12 +145,12 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) if (!shader) return NULL; - t_tex = ureg_DECL_temporary(shader); - t_start = ureg_DECL_temporary(shader); - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + t_tex = ureg_DECL_temporary(shader); + t_start = ureg_DECL_temporary(shader); + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); @@ -131,22 +177,17 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); - ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), - ureg_scalar(vrect, TGSI_SWIZZLE_X), - ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); - if(matrix_stage) { - calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); - calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); - } else { - calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); - calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); - } + calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); + calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_start); @@ -156,54 +197,8 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) return ureg_create_shader_and_destroy(shader, idct->pipe); } -static void -increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], - struct ureg_src saddr[2], bool right_side, bool transposed, - int pos, float size) -{ - unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; - unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; - - /* - * daddr[0..1].(start) = saddr[0..1].(start) - * daddr[0..1].(tc) = saddr[0..1].(tc) - */ - - ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); - ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); - ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); - ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); -} - -static void -fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler) -{ - ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler); - ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler); -} - -static void -matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) -{ - struct ureg_dst tmp; - - tmp = ureg_DECL_temporary(shader); - - /* - * tmp.xy = dot4(m[0][0..1], m[1][0..1]) - * dst = tmp.x + tmp.y - */ - ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); - ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); - ureg_ADD(shader, dst, - ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); - - ureg_release_temporary(shader, tmp); -} - static void * -create_matrix_frag_shader(struct vl_idct *idct) +create_stage1_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; @@ -272,25 +267,56 @@ create_matrix_frag_shader(struct vl_idct *idct) return ureg_create_shader_and_destroy(shader, idct->pipe); } -static void * -create_transpose_frag_shader(struct vl_idct *idct) +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex) { - struct ureg_program *shader; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_start; + struct ureg_dst o_l_addr[2], o_r_addr[2]; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + t_start = ureg_DECL_temporary(shader); + + --first_output; + + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); + o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); + + o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); + o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / idct->buffer_width, + (float)BLOCK_HEIGHT / idct->buffer_height); + + ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + + calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); + calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); +} + +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment) +{ struct ureg_src l_addr[2], r_addr[2]; struct ureg_dst l[2], r[2]; - struct ureg_dst fragment; - shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); - if (!shader) - return NULL; + --first_input; - l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); - l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); - r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); - r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); + r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); + r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); l[0] = ureg_DECL_temporary(shader); l[1] = ureg_DECL_temporary(shader); @@ -300,61 +326,39 @@ create_transpose_frag_shader(struct vl_idct *idct) fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0)); fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1)); - fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); - - matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r); + matrix_mul(shader, fragment, l, r); ureg_release_temporary(shader, l[0]); ureg_release_temporary(shader, l[1]); ureg_release_temporary(shader, r[0]); ureg_release_temporary(shader, r[1]); - - ureg_END(shader); - - return ureg_create_shader_and_destroy(shader, idct->pipe); } static bool init_shaders(struct vl_idct *idct) { - idct->matrix_vs = create_vert_shader(idct, true); - if (!idct->matrix_vs) - goto error_matrix_vs; - - idct->matrix_fs = create_matrix_frag_shader(idct); - if (!idct->matrix_fs) - goto error_matrix_fs; + idct->vs = create_stage1_vert_shader(idct); + if (!idct->vs) + goto error_vs; - idct->transpose_vs = create_vert_shader(idct, false); - if (!idct->transpose_vs) - goto error_transpose_vs; - - idct->transpose_fs = create_transpose_frag_shader(idct); - if (!idct->transpose_fs) - goto error_transpose_fs; + idct->fs = create_stage1_frag_shader(idct); + if (!idct->fs) + goto error_fs; return true; -error_transpose_fs: - idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); - -error_transpose_vs: - idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); - -error_matrix_fs: - idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); +error_fs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs); -error_matrix_vs: +error_vs: return false; } static void cleanup_shaders(struct vl_idct *idct) { - idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs); - idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); - idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs); - idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); + idct->pipe->delete_vs_state(idct->pipe, idct->vs); + idct->pipe->delete_fs_state(idct->pipe, idct->fs); } static bool @@ -447,30 +451,30 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) tex = buffer->sampler_views.individual.intermediate->texture; - buffer->fb_state[0].width = tex->width0; - buffer->fb_state[0].height = tex->height0; - buffer->fb_state[0].nr_cbufs = idct->nr_of_render_targets; + buffer->fb_state.width = tex->width0; + buffer->fb_state.height = tex->height0; + buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; for(i = 0; i < idct->nr_of_render_targets; ++i) { memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; surf_templ.u.tex.first_layer = i; surf_templ.u.tex.last_layer = i; surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; - buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface( + buffer->fb_state.cbufs[i] = idct->pipe->create_surface( idct->pipe, tex, &surf_templ); - if (!buffer->fb_state[0].cbufs[i]) + if (!buffer->fb_state.cbufs[i]) goto error_surfaces; } - buffer->viewport[0].scale[0] = tex->width0; - buffer->viewport[0].scale[1] = tex->height0; + buffer->viewport.scale[0] = tex->width0; + buffer->viewport.scale[1] = tex->height0; return true; error_surfaces: for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); return false; } @@ -483,7 +487,7 @@ cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) assert(idct && buffer); for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); } @@ -607,13 +611,13 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_sampler_view *intermediate, struct pipe_surface *destination) { - unsigned i; - assert(buffer); assert(idct); assert(source); assert(destination); + memset(buffer, 0, sizeof(struct vl_idct_buffer)); + pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); @@ -622,25 +626,12 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, if (!init_intermediate(idct, buffer)) return false; - /* init state */ - buffer->fb_state[1].width = destination->texture->width0; - buffer->fb_state[1].height = destination->texture->height0; - buffer->fb_state[1].nr_cbufs = 1; - pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination); - - buffer->viewport[1].scale[0] = destination->texture->width0; - buffer->viewport[1].scale[1] = destination->texture->height0; - - for(i = 0; i < 2; ++i) { - buffer->viewport[i].scale[2] = 1; - buffer->viewport[i].scale[3] = 1; - buffer->viewport[i].translate[0] = 0; - buffer->viewport[i].translate[1] = 0; - buffer->viewport[i].translate[2] = 0; - buffer->viewport[i].translate[3] = 0; - - buffer->fb_state[i].zsbuf = NULL; - } + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + buffer->viewport.translate[0] = 0; + buffer->viewport.translate[1] = 0; + buffer->viewport.translate[2] = 0; + buffer->viewport.translate[3] = 0; return true; } @@ -653,9 +644,7 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) assert(idct && buffer); for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL); - - pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL); + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); cleanup_intermediate(idct, buffer); } @@ -666,25 +655,28 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_ assert(idct); assert(buffer); - if(num_instances > 0) { - idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); - idct->pipe->bind_blend_state(idct->pipe, idct->blend); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); - - /* first stage */ - idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]); - idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); - idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); - idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); - - /* second stage */ - idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); - idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); - idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); - idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); - } + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_blend_state(idct->pipe, idct->blend); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); + + /* first stage */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); + idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport); + idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); + idct->pipe->bind_vs_state(idct->pipe, idct->vs); + idct->pipe->bind_fs_state(idct->pipe, idct->fs); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); +} + +void +vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + assert(idct); + assert(buffer); + + /* second stage */ + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); + idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); } + diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h index 96933b9d889..f5a1e5d9b73 100644 --- a/src/gallium/auxiliary/vl/vl_idct.h +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -30,6 +30,8 @@ #include +#include + /* shader based inverse distinct cosinus transformation * expect usage of vl_vertex_buffers as a todo list */ @@ -46,8 +48,7 @@ struct vl_idct void *samplers[2]; - void *matrix_vs, *transpose_vs; - void *matrix_fs, *transpose_fs; + void *vs, *fs; struct pipe_sampler_view *matrix; struct pipe_sampler_view *transpose; @@ -56,8 +57,8 @@ struct vl_idct /* a set of buffers to work with */ struct vl_idct_buffer { - struct pipe_viewport_state viewport[2]; - struct pipe_framebuffer_state fb_state[2]; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state fb_state; union { @@ -71,28 +72,45 @@ struct vl_idct_buffer }; /* upload the idct matrix, which can be shared by all idct instances of a pipe */ -struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float scale); +struct pipe_sampler_view * +vl_idct_upload_matrix(struct pipe_context *pipe, float scale); + +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex); + +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment); /* init an idct instance */ -bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, - unsigned buffer_width, unsigned buffer_height, - unsigned nr_of_render_targets, - struct pipe_sampler_view *matrix, - struct pipe_sampler_view *transpose); +bool +vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned nr_of_render_targets, + struct pipe_sampler_view *matrix, + struct pipe_sampler_view *transpose); /* destroy an idct instance */ -void vl_idct_cleanup(struct vl_idct *idct); +void +vl_idct_cleanup(struct vl_idct *idct); /* init a buffer assosiated with agiven idct instance */ -bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, - struct pipe_sampler_view *source, - struct pipe_sampler_view *intermediate, - struct pipe_surface *destination); +bool +vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_sampler_view *source, + struct pipe_sampler_view *intermediate, + struct pipe_surface *destination); /* cleanup a buffer of an idct instance */ -void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer); +void +vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer); /* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */ -void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts); +void +vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts); + +void +vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer); #endif diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index ecdce6b28bd..7474c58250d 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -37,12 +37,16 @@ #include "vl_defines.h" #include "vl_vertex_buffers.h" #include "vl_mc.h" +#include "vl_idct.h" enum VS_OUTPUT { VS_O_VPOS, VS_O_VTOP, - VS_O_VBOTTOM + VS_O_VBOTTOM, + + VS_O_FLAGS = VS_O_VTOP, + VS_O_VTEX = VS_O_VBOTTOM }; static struct ureg_dst @@ -220,13 +224,13 @@ create_ref_frag_shader(struct vl_mc *r) } static void * -create_ycbcr_vert_shader(struct vl_mc *r) +create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src vrect, vpos; struct ureg_dst t_vpos, t_vtex; - struct ureg_dst o_vpos, o_vtex; + struct ureg_dst o_vpos, o_flags; struct vertex2f scale = { (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size, @@ -246,11 +250,11 @@ create_ycbcr_vert_shader(struct vl_mc *r) t_vtex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); + o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS); /* * o_vtex.xy = t_vpos - * o_vtex.z = intra * 0.5 + * o_flags.z = intra * 0.5 * * if(interlaced) { * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } @@ -258,14 +262,16 @@ create_ycbcr_vert_shader(struct vl_mc *r) * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y * o_vpos.y = t_vtex.y + t_vpos.y * - * o_vtex.w = t_vtex.z ? 0 : 1 + * o_flags.w = t_vtex.z ? 0 : 1 * } * */ - ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MUL(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_Z), + + vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos); + + ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); - ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); + ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); @@ -286,7 +292,7 @@ create_ycbcr_vert_shader(struct vl_mc *r) ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), ureg_src(t_vpos), ureg_src(t_vtex)); - ureg_CMP(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), + ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); @@ -303,10 +309,10 @@ create_ycbcr_vert_shader(struct vl_mc *r) } static void * -create_ycbcr_frag_shader(struct vl_mc *r, float scale) +create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) { struct ureg_program *shader; - struct ureg_src tc, sampler; + struct ureg_src flags; struct ureg_dst tmp; struct ureg_dst fragment; unsigned label; @@ -315,9 +321,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale) if (!shader) return NULL; - tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); - - sampler = ureg_DECL_sampler(shader, 0); + flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); @@ -333,7 +337,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale) */ ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), - ureg_scalar(tc, TGSI_SWIZZLE_W), ureg_src(tmp)); + ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp)); ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label); @@ -342,15 +346,15 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale) ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ELSE(shader, &label); - ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc, sampler); + fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp); if (scale != 1.0f) ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, scale), - ureg_scalar(tc, TGSI_SWIZZLE_Z)); + ureg_scalar(flags, TGSI_SWIZZLE_Z)); else ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), - ureg_src(tmp), ureg_scalar(tc, TGSI_SWIZZLE_Z)); + ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); @@ -386,12 +390,6 @@ init_pipe_state(struct vl_mc *r) if (!r->sampler_ref) goto error_sampler_ref; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler); - if (!r->sampler_ycbcr) - goto error_sampler_ycbcr; - for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { memset(&blend, 0, sizeof blend); blend.independent_blend_enable = 0; @@ -442,9 +440,6 @@ error_blend: r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); error_sampler_ref: - r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr); - -error_sampler_ycbcr: return false; } @@ -456,7 +451,6 @@ cleanup_pipe_state(struct vl_mc *r) assert(r); r->pipe->delete_sampler_state(r->pipe, r->sampler_ref); - r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr); for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) { r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]); r->pipe->delete_blend_state(r->pipe, r->blend_add[i]); @@ -467,7 +461,10 @@ cleanup_pipe_state(struct vl_mc *r) bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, - unsigned macroblock_size, float scale) + unsigned macroblock_size, float scale, + vl_mc_ycbcr_vert_shader vs_callback, + vl_mc_ycbcr_frag_shader fs_callback, + void *callback_priv) { assert(renderer); assert(pipe); @@ -486,7 +483,7 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, if (!renderer->vs_ref) goto error_vs_ref; - renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer); + renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer, vs_callback, callback_priv); if (!renderer->vs_ycbcr) goto error_vs_ycbcr; @@ -494,7 +491,7 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, if (!renderer->fs_ref) goto error_fs_ref; - renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale); + renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, fs_callback, callback_priv); if (!renderer->fs_ycbcr) goto error_fs_ycbcr; @@ -614,8 +611,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref) } void -vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source, - unsigned component, unsigned num_instances) +vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances) { struct vl_mc *renderer; @@ -631,8 +627,5 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr); renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr); - renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &source); - renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr); - util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); } diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h index 353afa9df62..85ec69b3ce7 100644 --- a/src/gallium/auxiliary/vl/vl_mc.h +++ b/src/gallium/auxiliary/vl/vl_mc.h @@ -31,6 +31,8 @@ #include #include +#include + #include "vl_defines.h" #include "vl_types.h" @@ -51,7 +53,7 @@ struct vl_mc void *blend_add[VL_MC_NUM_BLENDERS]; void *vs_ref, *vs_ycbcr; void *fs_ref, *fs_ycbcr; - void *sampler_ref, *sampler_ycbcr; + void *sampler_ref; }; struct vl_mc_buffer @@ -64,9 +66,22 @@ struct vl_mc_buffer struct pipe_framebuffer_state fb_state; }; +typedef void (*vl_mc_ycbcr_vert_shader)(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_output, + struct ureg_dst tex); + +typedef void (*vl_mc_ycbcr_frag_shader)(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_input, + struct ureg_dst dst); + bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe, unsigned picture_width, unsigned picture_height, - unsigned macroblock_size, float scale); + unsigned macroblock_size, float scale, + vl_mc_ycbcr_vert_shader vs_callback, + vl_mc_ycbcr_frag_shader fs_callback, + void *callback_priv); void vl_mc_cleanup(struct vl_mc *renderer); @@ -78,7 +93,6 @@ void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref); -void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source, - unsigned component, unsigned num_instances); +void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances); #endif /* vl_mc_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 37789707a6b..74ec4b1db7b 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -145,7 +145,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) { enum pipe_format formats[3]; - struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv; + struct pipe_sampler_view **idct_source_sv, **mc_source_sv; struct pipe_surface **idct_surfaces; struct vl_mpeg12_decoder *dec; @@ -164,23 +164,23 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) if (!buffer->idct_source) goto error_source; - formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format; - buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe, - dec->base.width / dec->nr_of_idct_render_targets, - dec->base.height / 4, dec->nr_of_idct_render_targets, - dec->base.chroma_format, - formats, PIPE_USAGE_STATIC); + formats[0] = formats[1] = formats[2] = dec->mc_source_format; + buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->base.width / dec->nr_of_idct_render_targets, + dec->base.height / 4, dec->nr_of_idct_render_targets, + dec->base.chroma_format, + formats, PIPE_USAGE_STATIC); - if (!buffer->idct_intermediate) - goto error_intermediate; + if (!buffer->mc_source) + goto error_mc_source; idct_source_sv = buffer->idct_source->get_sampler_view_planes(buffer->idct_source); if (!idct_source_sv) goto error_source_sv; - idct_intermediate_sv = buffer->idct_intermediate->get_sampler_view_planes(buffer->idct_intermediate); - if (!idct_intermediate_sv) - goto error_intermediate_sv; + mc_source_sv = buffer->mc_source->get_sampler_view_planes(buffer->mc_source); + if (!mc_source_sv) + goto error_mc_source_sv; idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source); if (!idct_surfaces) @@ -189,7 +189,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < 3; ++i) if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c, &buffer->idct[i], idct_source_sv[i], - idct_intermediate_sv[i], idct_surfaces[i])) + mc_source_sv[i], idct_surfaces[i])) goto error_plane; return true; @@ -199,11 +199,11 @@ error_plane: vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]); error_surfaces: -error_intermediate_sv: +error_mc_source_sv: error_source_sv: - buffer->idct_intermediate->destroy(buffer->idct_intermediate); + buffer->mc_source->destroy(buffer->mc_source); -error_intermediate: +error_mc_source: buffer->idct_source->destroy(buffer->idct_source); error_source: @@ -223,7 +223,6 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf) vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]); vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]); buf->idct_source->destroy(buf->idct_source); - buf->idct_intermediate->destroy(buf->idct_intermediate); } static bool @@ -237,14 +236,16 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf) dec = (struct vl_mpeg12_decoder*)buf->base.decoder; assert(dec); - formats[0] = formats[1] = formats[2] =dec->mc_source_format; - buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, - dec->base.width, dec->base.height, 1, - dec->base.chroma_format, - formats, PIPE_USAGE_STATIC); + if (dec->base.entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT) { + formats[0] = formats[1] = formats[2] =dec->mc_source_format; + buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe, + dec->base.width, dec->base.height, 1, + dec->base.chroma_format, + formats, PIPE_USAGE_STATIC); - if (!buf->mc_source) - goto error_mc_source; + if (!buf->mc_source) + goto error_mc_source; + } if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0])) goto error_mc_y; @@ -420,6 +421,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) dec->pipe->bind_fs_state(dec->pipe, NULL); dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa); + dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr); vl_mc_cleanup(&dec->mc_y); vl_mc_cleanup(&dec->mc_c); @@ -563,7 +565,14 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer, vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); dec->pipe->set_vertex_buffers(dec->pipe, 2, vb); - vl_mc_render_ycbcr(&buf->mc[i], mc_source_sv[component], j, num_ycbcr_blocks[component]); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) + vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]); + else { + dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]); + dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr); + } + vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]); } } } @@ -572,6 +581,7 @@ static bool init_pipe_state(struct vl_mpeg12_decoder *dec) { struct pipe_depth_stencil_alpha_state dsa; + struct pipe_sampler_state sampler; unsigned i; assert(dec); @@ -595,6 +605,20 @@ init_pipe_state(struct vl_mpeg12_decoder *dec) dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa); dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa); + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler); + if (!dec->sampler_ycbcr) + return false; + return true; } @@ -675,10 +699,10 @@ init_idct(struct vl_mpeg12_decoder *dec) if (dec->idct_source_format == PIPE_FORMAT_NONE) return false; - dec->idct_intermediate_format = find_first_supported_format(dec, const_idct_intermediate_formats, - num_idct_intermediate_formats, PIPE_TEXTURE_3D); + dec->mc_source_format = find_first_supported_format(dec, const_idct_intermediate_formats, + num_idct_intermediate_formats, PIPE_TEXTURE_3D); - if (dec->idct_intermediate_format == PIPE_FORMAT_NONE) + if (dec->mc_source_format == PIPE_FORMAT_NONE) return false; switch (dec->idct_source_format) { @@ -695,8 +719,8 @@ init_idct(struct vl_mpeg12_decoder *dec) return false; } - if (dec->idct_intermediate_format == PIPE_FORMAT_R16G16B16A16_FLOAT || - dec->idct_intermediate_format == PIPE_FORMAT_R32G32B32A32_FLOAT) + if (dec->mc_source_format == PIPE_FORMAT_R16G16B16A16_FLOAT || + dec->mc_source_format == PIPE_FORMAT_R32G32B32A32_FLOAT) transpose_scale = 1.0f; else transpose_scale = matrix_scale = sqrt(matrix_scale); @@ -738,6 +762,49 @@ error_matrix: return false; } +static void +mc_vert_shader_callback(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_output, + struct ureg_dst tex) +{ + struct vl_mpeg12_decoder *dec = priv; + struct ureg_dst o_vtex; + + assert(priv && mc); + assert(shader); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c; + vl_idct_stage2_vert_shader(idct, shader, first_output, tex); + } else { + o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output); + ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex)); + } +} + +static void +mc_frag_shader_callback(void *priv, struct vl_mc *mc, + struct ureg_program *shader, + unsigned first_input, + struct ureg_dst dst) +{ + struct vl_mpeg12_decoder *dec = priv; + struct ureg_src src, sampler; + + assert(priv && mc); + assert(shader); + + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) { + struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c; + vl_idct_stage2_frag_shader(idct, shader, first_input, dst); + } else { + src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR); + sampler = ureg_DECL_sampler(shader, 0); + ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler); + } +} + struct pipe_video_decoder * vl_create_mpeg12_decoder(struct pipe_video_context *context, struct pipe_context *pipe, @@ -785,12 +852,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, /* TODO: Implement 422, 444 */ assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats, - num_mc_source_formats, PIPE_TEXTURE_3D); - - if (dec->mc_source_format == PIPE_FORMAT_NONE) - return NULL; - if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { dec->chroma_width = dec->base.width / 2; dec->chroma_height = dec->base.height / 2; @@ -813,6 +874,12 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, else mc_scale = 1.0f; } else { + dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats, + num_mc_source_formats, PIPE_TEXTURE_3D); + + if (dec->mc_source_format == PIPE_FORMAT_NONE) + return NULL; + switch (dec->mc_source_format) { case PIPE_FORMAT_R16_SNORM: mc_scale = SCALE_FACTOR_SNORM; @@ -828,11 +895,13 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, } } - if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale)) + if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale, + mc_vert_shader_callback, mc_frag_shader_callback, dec)) goto error_mc_y; // TODO - if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale)) + if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale, + mc_vert_shader_callback, mc_frag_shader_callback, dec)) goto error_mc_c; if (!init_pipe_state(dec)) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index 9d5768816fb..e483ace03b4 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -53,7 +53,6 @@ struct vl_mpeg12_decoder enum pipe_format zscan_source_format; enum pipe_format idct_source_format; - enum pipe_format idct_intermediate_format; enum pipe_format mc_source_format; struct pipe_vertex_buffer quads; @@ -62,6 +61,8 @@ struct vl_mpeg12_decoder void *ves_ycbcr; void *ves_mv; + void *sampler_ycbcr; + struct vl_zscan zscan_y, zscan_c; struct vl_idct idct_y, idct_c; struct vl_mc mc_y, mc_c; @@ -77,7 +78,6 @@ struct vl_mpeg12_buffer struct pipe_video_buffer *zscan_source; struct pipe_video_buffer *idct_source; - struct pipe_video_buffer *idct_intermediate; struct pipe_video_buffer *mc_source; struct vl_zscan_buffer zscan[VL_MAX_PLANES]; -- 2.30.2