From 508a4a056c3140dc1f90b93acd46c06c30f7094e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Sun, 14 Nov 2010 23:16:49 +0100 Subject: [PATCH] [g3dvl] add skeleton and incomplete idct --- src/gallium/auxiliary/Makefile | 3 +- src/gallium/auxiliary/vl/vl_idct.c | 505 ++++++++++++++++++ src/gallium/auxiliary/vl/vl_idct.h | 90 ++++ .../auxiliary/vl/vl_mpeg12_mc_renderer.c | 88 +-- .../auxiliary/vl/vl_mpeg12_mc_renderer.h | 6 +- 5 files changed, 616 insertions(+), 76 deletions(-) create mode 100644 src/gallium/auxiliary/vl/vl_idct.c create mode 100644 src/gallium/auxiliary/vl/vl_idct.h diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 49ff1653e0e..07b3372c914 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -147,7 +147,8 @@ C_SOURCES = \ vl/vl_bitstream_parser.c \ vl/vl_mpeg12_mc_renderer.c \ vl/vl_compositor.c \ - vl/vl_csc.c + vl/vl_csc.c \ + vl/vl_idct.c GALLIVM_SOURCES = \ gallivm/lp_bld_arit.c \ diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c new file mode 100644 index 00000000000..ce535ad3862 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -0,0 +1,505 @@ +/************************************************************************** + * + * Copyright 2010 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vl_idct.h" +#include +#include +#include +#include +#include +#include +#include +#include "vl_types.h" + +#define BLOCK_WIDTH 8 +#define BLOCK_HEIGHT 8 +#define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f) +#define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f) + +struct vertex_shader_consts +{ + struct vertex4f norm; +}; + +enum VS_INPUT +{ + VS_I_RECT, + VS_I_VPOS, + + NUM_VS_INPUTS +}; + +enum VS_OUTPUT +{ + VS_O_VPOS, + VS_O_BLOCK, + VS_O_TEX, + VS_O_START, + VS_O_STEP +}; + + +const float const_matrix[8][8] = { + { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, + { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, + { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, + { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, + { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, + { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, + { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, + { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } +}; + +const float const_transpose[8][8] = { + { 0.3535530f, 0.4903930f, 0.4619400f, 0.4157350f, 0.3535530f, 0.2777850f, 0.191342f, 0.0975451f }, + { 0.3535530f, 0.4157350f, 0.1913420f, -0.0975452f, -0.3535530f, -0.4903930f, -0.461940f, -0.2777850f }, + { 0.3535530f, 0.2777850f, -0.1913420f, -0.4903930f, -0.3535530f, 0.0975452f, 0.461940f, 0.4157350f }, + { 0.3535530f, 0.0975451f, -0.4619400f, -0.2777850f, 0.3535540f, 0.4157350f, -0.191342f, -0.4903930f }, + { 0.3535530f, -0.0975452f, -0.4619400f, 0.2777850f, 0.3535530f, -0.4157350f, -0.191341f, 0.4903930f }, + { 0.3535530f, -0.2777850f, -0.1913420f, 0.4903930f, -0.3535540f, -0.0975451f, 0.461940f, -0.4157350f }, + { 0.3535530f, -0.4157350f, 0.1913420f, 0.0975450f, -0.3535530f, 0.4903930f, -0.461940f, 0.2777860f }, + { 0.3535530f, -0.4903930f, 0.4619400f, -0.4157350f, 0.3535530f, -0.2777850f, 0.191342f, -0.0975458f } +}; + +static void * +create_vert_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + struct ureg_src norm, bs; + struct ureg_src vrect, vpos; + struct ureg_dst scale, t_vpos; + struct ureg_dst o_vpos, o_block, o_tex, o_start, o_step; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + norm = ureg_DECL_constant(shader, 0); + bs = ureg_imm2f(shader, BLOCK_WIDTH, BLOCK_HEIGHT); + + scale = ureg_DECL_temporary(shader); + t_vpos = ureg_DECL_temporary(shader); + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK); + o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_TEX); + o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_START); + o_step = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP); + + /* + * scale = norm * mbs; + * + * t_vpos = vpos + vrect + * o_vpos.xy = t_vpos * scale + * o_vpos.zw = vpos + * + * o_block = vrect + * o_tex = t_pos + * o_start = vpos * scale + * o_step = norm + * + */ + ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, bs); + + ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); + + ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect); + ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, ureg_src(scale)); + ureg_MOV(shader, ureg_writemask(o_step, TGSI_WRITEMASK_XY), norm); + + ureg_release_temporary(shader, t_vpos); + ureg_release_temporary(shader, scale); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void +matrix_mul(struct ureg_program *shader, struct ureg_dst dst, + struct ureg_src tc[2], struct ureg_src sampler[2], + struct ureg_src start[2], struct ureg_src step[2], + float scale[2]) +{ + struct ureg_dst t_tc[2], m[2][2], tmp[2]; + unsigned i, j; + + for(i = 0; i < 2; ++i) { + t_tc[i] = ureg_DECL_temporary(shader); + for(j = 0; j < 2; ++j) + m[i][j] = ureg_DECL_temporary(shader); + tmp[i] = ureg_DECL_temporary(shader); + } + + /* + * m[0..1][0] = ? + * tmp[0..1] = dot4(m[0..1][0], m[0..1][1]) + * fragment = tmp[0] + tmp[1] + */ + ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), start[0]); + ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), tc[0]); + + ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]); + ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]); + + for(i = 0; i < 2; ++i) { + for(j = 0; j < 4; ++j) { + /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */ + ureg_TEX(shader, tmp[0], TGSI_TEXTURE_2D, ureg_src(t_tc[0]), sampler[0]); + ureg_MOV(shader, ureg_writemask(m[i][0], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X)); + + ureg_TEX(shader, tmp[1], TGSI_TEXTURE_2D, ureg_src(t_tc[1]), sampler[1]); + ureg_MOV(shader, ureg_writemask(m[i][1], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X)); + + ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_src(t_tc[0]), step[0]); + ureg_ADD(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_src(t_tc[1]), step[1]); + } + + if(scale[0] != 1.0f) + ureg_MUL(shader, m[i][0], ureg_src(m[i][0]), ureg_scalar(ureg_imm1f(shader, scale[0]), TGSI_SWIZZLE_X)); + + if(scale[1] != 1.0f) + ureg_MUL(shader, m[i][1], ureg_src(m[i][1]), ureg_scalar(ureg_imm1f(shader, scale[1]), TGSI_SWIZZLE_X)); + } + + ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[0][1])); + ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[1][0]), ureg_src(m[1][1])); + ureg_ADD(shader, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1])); + + for(i = 0; i < 2; ++i) { + ureg_release_temporary(shader, t_tc[i]); + for(j = 0; j < 2; ++j) + ureg_release_temporary(shader, m[i][j]); + ureg_release_temporary(shader, tmp[i]); + } +} + +static void * +create_transpose_frag_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + struct ureg_src tc[2], sampler[2]; + struct ureg_src start[2], step[2]; + struct ureg_dst fragment; + float scale[2]; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); + tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); + + start[0] = ureg_imm1f(shader, 0.0f); + start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT); + + step[0] = ureg_imm1f(shader, 1.0f / BLOCK_HEIGHT); + step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT); + + sampler[0] = ureg_DECL_sampler(shader, 0); + sampler[1] = ureg_DECL_sampler(shader, 1); + + scale[0] = 1.0f; + scale[1] = SCALE_FACTOR_16_TO_12; + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + matrix_mul(shader, fragment, tc, sampler, start, step, scale); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void * +create_matrix_frag_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + struct ureg_src tc[2], sampler[2]; + struct ureg_src start[2], step[2]; + struct ureg_dst tmp, fragment; + float scale[2]; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + tmp = ureg_DECL_temporary(shader); + + tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); + tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); + + start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT); + start[1] = ureg_imm1f(shader, 0.0f); + + step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT); + step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH); + + sampler[0] = ureg_DECL_sampler(shader, 0); + sampler[1] = ureg_DECL_sampler(shader, 1); + + scale[0] = 1.0f; + scale[1] = 1.0f; + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + matrix_mul(shader, tmp, tc, sampler, start, step, scale); + ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X)); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void +xfer_buffers_map(struct vl_idct *idct) +{ + struct pipe_box rect = + { + 0, 0, 0, + idct->destination->width0, + idct->destination->height0, + 1 + }; + + idct->tex_transfer = idct->pipe->get_transfer + ( + idct->pipe, idct->destination, + u_subresource(0, 0), + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + + idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer); +} + +static void +xfer_buffers_unmap(struct vl_idct *idct) +{ + idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer); + idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer); +} + +static bool +init_shaders(struct vl_idct *idct) +{ + assert(idct); + + assert(idct->vs = create_vert_shader(idct)); + assert(idct->transpose_fs = create_transpose_frag_shader(idct)); + assert(idct->matrix_fs = create_matrix_frag_shader(idct)); + + return true; +} + +static void +cleanup_shaders(struct vl_idct *idct) +{ + assert(idct); + + idct->pipe->delete_vs_state(idct->pipe, idct->vs); + idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); + idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); +} + +static bool +init_buffers(struct vl_idct *idct) +{ + struct pipe_resource template; + struct pipe_sampler_view sampler_view; + struct pipe_vertex_element vertex_elems[2]; + + const unsigned max_blocks = + align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH * + align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT * + idct->destination->depth0; + + unsigned i; + + memset(&template, 0, sizeof(struct pipe_resource)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R32_FLOAT; + template.last_level = 0; + template.width0 = 8; + template.height0 = 8; + template.depth0 = 1; + template.usage = PIPE_USAGE_IMMUTABLE; + template.bind = PIPE_BIND_SAMPLER_VIEW; + template.flags = 0; + + idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template); + idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template); + + template.format = idct->destination->format; + template.width0 = idct->destination->width0; + template.height0 = idct->destination->height0; + template.depth0 = idct->destination->depth0; + template.usage = PIPE_USAGE_DYNAMIC; + idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); + + template.format = PIPE_FORMAT_R32_FLOAT; + template.usage = PIPE_USAGE_STATIC; + idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template); + + for (i = 0; i < 4; ++i) { + u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format); + idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view); + } + + idct->quad.stride = sizeof(struct vertex2f); + idct->quad.max_index = 4 * max_blocks - 1; + idct->quad.buffer_offset = 0; + idct->quad.buffer = pipe_buffer_create + ( + idct->pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + sizeof(struct vertex2f) * 4 * max_blocks + ); + + idct->pos.stride = sizeof(struct vertex2f); + idct->pos.max_index = 4 * max_blocks - 1; + idct->pos.buffer_offset = 0; + idct->pos.buffer = pipe_buffer_create + ( + idct->pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + sizeof(struct vertex2f) * 4 * max_blocks + ); + + /* Rect element */ + vertex_elems[0].src_offset = 0; + vertex_elems[0].instance_divisor = 0; + vertex_elems[0].vertex_buffer_index = 0; + vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Pos element */ + vertex_elems[1].src_offset = 0; + vertex_elems[1].instance_divisor = 0; + vertex_elems[1].vertex_buffer_index = 1; + vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems); + + idct->vs_const_buf = pipe_buffer_create + ( + idct->pipe->screen, + PIPE_BIND_CONSTANT_BUFFER, + sizeof(struct vertex_shader_consts) + ); + + return true; +} + +static void +cleanup_buffers(struct vl_idct *idct) +{ + unsigned i; + + assert(idct); + + pipe_resource_reference(&idct->vs_const_buf, NULL); + + for (i = 0; i < 4; ++i) { + pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL); + pipe_resource_reference(&idct->textures.all[i], NULL); + } + + idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state); + pipe_resource_reference(&idct->quad.buffer, NULL); + pipe_resource_reference(&idct->pos.buffer, NULL); +} + +bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst) +{ + assert(idct && pipe && dst); + + idct->pipe = pipe; + + idct->viewport.scale[0] = dst->width0; + idct->viewport.scale[1] = dst->height0; + idct->viewport.scale[2] = 1; + idct->viewport.scale[3] = 1; + idct->viewport.translate[0] = 0; + idct->viewport.translate[1] = 0; + idct->viewport.translate[2] = 0; + idct->viewport.translate[3] = 0; + + idct->fb_state.width = dst->width0; + idct->fb_state.height = dst->height0; + idct->fb_state.nr_cbufs = 1; + idct->fb_state.zsbuf = NULL; + + pipe_resource_reference(&idct->destination, dst); + + if(!init_shaders(idct)) + return false; + + if(!init_buffers(idct)) { + cleanup_shaders(idct); + return false; + } + + xfer_buffers_map(idct); + + return true; +} + +void vl_idct_cleanup(struct vl_idct *idct) +{ + cleanup_shaders(idct); + cleanup_buffers(idct); +} + +void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block) +{ + unsigned tex_pitch; + short *texels; + unsigned i; + + assert(idct); + assert(block); + + tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format); + texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; + + for (i = 0; i < BLOCK_HEIGHT; ++i) + memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +void vl_idct_flush(struct vl_idct *idct) +{ + xfer_buffers_unmap(idct); + // TODO + xfer_buffers_map(idct); +} diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h new file mode 100644 index 00000000000..02a3250399f --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -0,0 +1,90 @@ +/************************************************************************** + * + * Copyright 2010 Christian König + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef vl_idct_h +#define vl_idct_h + +#include + +struct vl_idct +{ + struct pipe_context *pipe; + + struct pipe_viewport_state viewport; + struct pipe_resource *vs_const_buf; + struct pipe_framebuffer_state fb_state; + + struct pipe_resource *destination; + + void *vertex_elems_state; + + union + { + void *all[4]; + struct { + void *matrix, *transpose; + void *source, *intermediate; + } individual; + } samplers; + + union + { + struct pipe_sampler_view *all[4]; + struct { + struct pipe_sampler_view *matrix, *transpose; + struct pipe_sampler_view *source, *intermediate; + } individual; + } sampler_views; + + void *vs; + void *transpose_fs, *matrix_fs; + + union + { + struct pipe_resource *all[4]; + struct { + struct pipe_resource *matrix, *transpose; + struct pipe_resource *source, *intermediate; + } individual; + } textures; + + struct pipe_vertex_buffer quad; + struct pipe_vertex_buffer pos; + + struct pipe_transfer *tex_transfer; + short *texels; +}; + +bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst); + +void vl_idct_cleanup(struct vl_idct *idct); + +void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block); + +void vl_idct_flush(struct vl_idct *idct); + +#endif diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 5c0404b4b07..7bc7ba91f94 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -572,47 +572,6 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) return ureg_create_shader_and_destroy(shader, r->pipe); } -static void -xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; - - assert(r); - - for (i = 0; i < 3; ++i) { - struct pipe_box rect = - { - 0, 0, 0, - r->textures.all[i]->width0, - r->textures.all[i]->height0, - 1 - }; - - r->tex_transfer[i] = r->pipe->get_transfer - ( - r->pipe, r->textures.all[i], - u_subresource(0, 0), - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - - r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]); - } -} - -static void -xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; - - assert(r); - - for (i = 0; i < 3; ++i) { - r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]); - r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]); - } -} - static bool init_pipe_state(struct vl_mpeg12_mc_renderer *r) { @@ -1209,7 +1168,10 @@ flush(struct vl_mpeg12_mc_renderer *r) assert(r); assert(r->num_macroblocks == r->macroblocks_per_batch); - xfer_buffers_unmap(r); + vl_idct_flush(&r->idct_y); + vl_idct_flush(&r->idct_cr); + vl_idct_flush(&r->idct_cb); + gen_macroblock_stream(r, num_macroblocks); if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { @@ -1322,7 +1284,6 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence); r->num_macroblocks = 0; - xfer_buffers_map(r); } static void @@ -1352,40 +1313,20 @@ update_render_target(struct vl_mpeg12_mc_renderer *r) r->pipe->set_viewport_state(r->pipe, &r->viewport); } -static void -grab_coded_block(short *src, short *dst, unsigned dst_pitch) -{ - unsigned y; - - assert(src); - assert(dst); - - for (y = 0; y < BLOCK_HEIGHT; ++y) - memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); -} - static void grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) { - unsigned tex_pitch; - short *texels; unsigned tb = 0, sb = 0; - unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT; unsigned x, y; assert(r); assert(blocks); - tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format); - texels = r->texels[0] + mbpy * tex_pitch + mbpx; - for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) { if ((cbp >> (5 - tb)) & 1) { - grab_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, - texels + y * tex_pitch * BLOCK_HEIGHT + - x * BLOCK_WIDTH, tex_pitch); + vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT); ++sb; } } @@ -1394,15 +1335,12 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, /* TODO: Implement 422, 444 */ assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - mbpx /= 2; - mbpy /= 2; - for (tb = 0; tb < 2; ++tb) { - tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format); - texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; - if ((cbp >> (1 - tb)) & 1) { - grab_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch); + if(tb == 0) + vl_idct_add_block(&r->idct_cb, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT); + else + vl_idct_add_block(&r->idct_cr, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT); ++sb; } } @@ -1499,7 +1437,9 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, renderer->future = NULL; renderer->num_macroblocks = 0; - xfer_buffers_map(renderer); + vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y); + vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr); + vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb); return true; } @@ -1509,7 +1449,9 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer) { assert(renderer); - xfer_buffers_unmap(renderer); + vl_idct_cleanup(&renderer->idct_y); + vl_idct_cleanup(&renderer->idct_cr); + vl_idct_cleanup(&renderer->idct_cb); util_delete_keymap(renderer->texview_map, renderer->pipe); cleanup_pipe_state(renderer); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 50c38f9ff10..ed48b5b6b45 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -32,6 +32,7 @@ #include #include #include "vl_types.h" +#include "vl_idct.h" struct pipe_context; struct pipe_macroblock; @@ -57,6 +58,9 @@ struct vl_mpeg12_mc_renderer struct pipe_viewport_state viewport; struct pipe_resource *vs_const_buf; struct pipe_framebuffer_state fb_state; + + struct vl_idct idct_y, idct_cb, idct_cr; + union { void *all[3]; @@ -94,8 +98,6 @@ struct vl_mpeg12_mc_renderer struct pipe_fence_handle **fence; unsigned num_macroblocks; struct pipe_mpeg12_macroblock *macroblock_buf; - struct pipe_transfer *tex_transfer[3]; - short *texels[3]; struct keymap *texview_map; }; -- 2.30.2