From f547472bfa0a797adacc2a7688b4c1ba65381a80 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 27 Sep 2009 19:49:06 -0400 Subject: [PATCH] g3dvl: pipe_video_context interface, softpipe impl, auxiliary libs --- configs/default | 2 +- configure.ac | 2 +- src/gallium/SConscript | 1 + src/gallium/auxiliary/vl/Makefile | 12 + src/gallium/auxiliary/vl/SConscript | 12 + .../auxiliary/vl/vl_bitstream_parser.c | 144 ++ .../auxiliary/vl/vl_bitstream_parser.h | 36 + src/gallium/auxiliary/vl/vl_compositor.c | 590 ++++++ src/gallium/auxiliary/vl/vl_compositor.h | 47 + .../auxiliary/vl/vl_mpeg12_mc_renderer.c | 1662 +++++++++++++++++ .../auxiliary/vl/vl_mpeg12_mc_renderer.h | 93 + src/gallium/auxiliary/vl/vl_shader_build.c | 215 +++ src/gallium/auxiliary/vl/vl_shader_build.h | 61 + src/gallium/drivers/softpipe/Makefile | 3 +- src/gallium/drivers/softpipe/SConscript | 3 +- src/gallium/drivers/softpipe/sp_texture.c | 56 + src/gallium/drivers/softpipe/sp_texture.h | 16 + .../drivers/softpipe/sp_video_context.c | 273 +++ .../drivers/softpipe/sp_video_context.h | 30 + src/gallium/include/pipe/p_defines.h | 24 + src/gallium/include/pipe/p_format.h | 18 + src/gallium/include/pipe/p_screen.h | 16 +- src/gallium/include/pipe/p_video_context.h | 92 + src/gallium/include/pipe/p_video_state.h | 158 ++ 24 files changed, 3561 insertions(+), 5 deletions(-) create mode 100644 src/gallium/auxiliary/vl/Makefile create mode 100644 src/gallium/auxiliary/vl/SConscript create mode 100644 src/gallium/auxiliary/vl/vl_bitstream_parser.c create mode 100644 src/gallium/auxiliary/vl/vl_bitstream_parser.h create mode 100644 src/gallium/auxiliary/vl/vl_compositor.c create mode 100644 src/gallium/auxiliary/vl/vl_compositor.h create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h create mode 100644 src/gallium/auxiliary/vl/vl_shader_build.c create mode 100644 src/gallium/auxiliary/vl/vl_shader_build.h create mode 100644 src/gallium/drivers/softpipe/sp_video_context.c create mode 100644 src/gallium/drivers/softpipe/sp_video_context.h create mode 100644 src/gallium/include/pipe/p_video_context.h create mode 100644 src/gallium/include/pipe/p_video_state.h diff --git a/configs/default b/configs/default index cb3ca1046f4..f1e2aa3ef5f 100644 --- a/configs/default +++ b/configs/default @@ -94,7 +94,7 @@ EGL_DRIVERS_DIRS = demo # Gallium directories and GALLIUM_DIRS = auxiliary drivers state_trackers -GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices +GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) GALLIUM_DRIVERS_DIRS = softpipe i915simple failover trace identity GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) diff --git a/configure.ac b/configure.ac index 2881bb6bc25..143fd31a022 100644 --- a/configure.ac +++ b/configure.ac @@ -417,7 +417,7 @@ WINDOW_SYSTEM="" GALLIUM_DIRS="auxiliary drivers state_trackers" GALLIUM_WINSYS_DIRS="" GALLIUM_WINSYS_DRM_DIRS="" -GALLIUM_AUXILIARY_DIRS="rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices" +GALLIUM_AUXILIARY_DIRS="rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl" GALLIUM_DRIVERS_DIRS="softpipe failover trace identity" GALLIUM_STATE_TRACKERS_DIRS="" diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 89c69d7205e..8be84cddbe7 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -23,6 +23,7 @@ SConscript([ 'auxiliary/pipebuffer/SConscript', 'auxiliary/indices/SConscript', 'auxiliary/rbug/SConscript', + 'auxiliary/vl/SConscript', ]) for driver in env['drivers']: diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile new file mode 100644 index 00000000000..71bfb937ad7 --- /dev/null +++ b/src/gallium/auxiliary/vl/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = vl + +C_SOURCES = \ + vl_bitstream_parser.c \ + vl_mpeg12_mc_renderer.c \ + vl_compositor.c \ + vl_shader_build.c + +include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript new file mode 100644 index 00000000000..eb50940c359 --- /dev/null +++ b/src/gallium/auxiliary/vl/SConscript @@ -0,0 +1,12 @@ +Import('*') + +vl = env.ConvenienceLibrary( + target = 'vl', + source = [ + 'vl_bitstream_parser.c', + 'vl_mpeg12_mc_renderer.c', + 'vl_compositor.c', + 'vl_shader_build.c', + ]) + +auxiliaries.insert(0, vl) diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c new file mode 100644 index 00000000000..356faa13481 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c @@ -0,0 +1,144 @@ +#include "vl_bitstream_parser.h" +#include +#include +#include + +static unsigned +grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt) +{ + unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits - cursor; + + assert(cursor < sizeof(unsigned) * CHAR_BIT); + assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT); + assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT); + + return (bitstream_elt << excess_bits) >> (excess_bits + cursor); +} + +static unsigned +show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream) +{ + unsigned cur_int = cursor / (sizeof(unsigned) * CHAR_BIT); + unsigned cur_bit = cursor % (sizeof(unsigned) * CHAR_BIT); + + assert(bitstream); + + if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) + { + return grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit, + bitstream[cur_int]) | + grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT, + bitstream[cur_int + 1]) << (sizeof(unsigned) * CHAR_BIT - cur_bit); + } + else + return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]); +} + +bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, + unsigned num_bitstreams, + const void **bitstreams, + const unsigned *sizes) +{ + assert(parser); + assert(num_bitstreams); + assert(bitstreams); + assert(sizes); + + parser->num_bitstreams = num_bitstreams; + parser->bitstreams = (const unsigned**)bitstreams; + parser->sizes = sizes; + parser->cur_bitstream = 0; + parser->cursor = 0; + + return true; +} + +void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser) +{ + assert(parser); +} + +unsigned +vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + unsigned bits; + + assert(parser); + + bits = vl_bitstream_parser_show_bits(parser, how_many_bits); + + vl_bitstream_parser_forward(parser, how_many_bits); + + return bits; +} + +unsigned +vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + unsigned bits = 0; + unsigned shift = 0; + unsigned cursor; + unsigned cur_bitstream; + + assert(parser); + + cursor = parser->cursor; + cur_bitstream = parser->cur_bitstream; + + while (1) + { + unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor; + unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits; + + bits |= show_bits(cursor, bits_to_show, + parser->bitstreams[cur_bitstream]) << shift; + + if (how_many_bits > bits_to_show) + { + how_many_bits -= bits_to_show; + cursor = 0; + ++cur_bitstream; + shift += bits_to_show; + } + else + break; + } + + return bits; +} + +void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + assert(parser); + assert(how_many_bits); + + parser->cursor += how_many_bits; + + while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) + { + parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT; + assert(parser->cur_bitstream < parser->num_bitstreams); + } +} + +void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + signed c; + + assert(parser); + assert(how_many_bits); + + c = parser->cursor - how_many_bits; + + while (c < 0) + { + c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT; + assert(parser->cur_bitstream < parser->num_bitstreams); + } + + parser->cursor = (unsigned)c; +} diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h new file mode 100644 index 00000000000..46bebf470ff --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.h @@ -0,0 +1,36 @@ +#ifndef vl_bitstream_parser_h +#define vl_bitstream_parser_h + +#include + +struct vl_bitstream_parser +{ + unsigned num_bitstreams; + const unsigned **bitstreams; + const unsigned *sizes; + unsigned cur_bitstream; + unsigned cursor; +}; + +bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, + unsigned num_bitstreams, + const void **bitstreams, + const unsigned *sizes); + +void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser); + +unsigned +vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +unsigned +vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +#endif /* vl_bitstream_parser_h */ diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c new file mode 100644 index 00000000000..0894421c0b8 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -0,0 +1,590 @@ +#include "vl_compositor.h" +#include +#include +#include +#include +#include +#include +#include "vl_shader_build.h" + +struct vertex2f +{ + float x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex_shader_consts +{ + struct vertex4f dst_scale; + struct vertex4f dst_trans; + struct vertex4f src_scale; + struct vertex4f src_trans; +}; + +struct fragment_shader_consts +{ + struct vertex4f bias; + float matrix[16]; +}; + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +static const struct vertex2f surface_verts[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + * TODO: Duplicate these in the shader, no need to create a buffer. + */ +static const struct vertex2f *surface_texcoords = surface_verts; + +/* + * Identity color conversion constants, for debugging + */ +static const struct fragment_shader_consts identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct fragment_shader_consts bt_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const struct fragment_shader_consts bt_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct fragment_shader_consts bt_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct fragment_shader_consts bt_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +static void +create_vert_shader(struct vl_compositor *c) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(c); + + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (unsigned i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale vertex pos rect to destination size + * decl c1 ; Translation vector to move vertex pos rect into position + * decl c2 ; Scaling vector to scale texcoord rect to source size + * decl c3 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (unsigned i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos + * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos + */ + for (unsigned i = 0; i < 2; ++i) + { + inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs); + FREE(tokens); +} + +static void +create_frag_shader(struct vl_compositor *c) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(c); + + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs); + FREE(tokens); +} + +static bool +init_pipe_state(struct vl_compositor *c) +{ + struct pipe_sampler_state sampler; + + assert(c); + + c->fb_state.nr_cbufs = 1; + c->fb_state.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler); + + return true; +} + +static void cleanup_pipe_state(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_sampler_state(c->pipe, c->sampler); +} + +static bool +init_shaders(struct vl_compositor *c) +{ + assert(c); + + create_vert_shader(c); + create_frag_shader(c); + + return true; +} + +static void cleanup_shaders(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_vs_state(c->pipe, c->vertex_shader); + c->pipe->delete_fs_state(c->pipe, c->fragment_shader); +} + +static bool +init_buffers(struct vl_compositor *c) +{ + assert(c); + + /* + * Create our vertex buffer and vertex buffer element + * VB contains 4 vertices that render a quad covering the entire window + * to display a rendered surface + * Quad is rendered as a tri strip + */ + c->vertex_bufs[0].stride = sizeof(struct vertex2f); + c->vertex_bufs[0].max_index = 3; + c->vertex_bufs[0].buffer_offset = 0; + c->vertex_bufs[0].buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_verts, + sizeof(struct vertex2f) * 4 + ); + + pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); + + c->vertex_elems[0].src_offset = 0; + c->vertex_elems[0].vertex_buffer_index = 0; + c->vertex_elems[0].nr_components = 2; + c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our texcoord buffer and texcoord buffer element + * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + c->vertex_bufs[1].stride = sizeof(struct vertex2f); + c->vertex_bufs[1].max_index = 3; + c->vertex_bufs[1].buffer_offset = 0; + c->vertex_bufs[1].buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_texcoords, + sizeof(struct vertex2f) * 4 + ); + + pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); + + c->vertex_elems[1].src_offset = 0; + c->vertex_elems[1].vertex_buffer_index = 1; + c->vertex_elems[1].nr_components = 2; + c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our vertex shader's constant buffer + * Const buffer contains scaling and translation vectors + */ + c->vs_const_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex_shader_consts) + ); + + /* + * Create our fragment shader's constant buffer + * Const buffer contains the color conversion matrix and bias vectors + */ + c->fs_const_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + sizeof(struct fragment_shader_consts) + ); + + /* + * TODO: Refactor this into a seperate function, + * allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &bt_601_full, + sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(c->pipe->screen, c->fs_const_buf.buffer); + + return true; +} + +static void +cleanup_buffers(struct vl_compositor *c) +{ + assert(c); + + for (unsigned i = 0; i < 2; ++i) + pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); + + pipe_buffer_reference(&c->vs_const_buf.buffer, NULL); + pipe_buffer_reference(&c->fs_const_buf.buffer, NULL); +} + +bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) +{ + assert(compositor); + + memset(compositor, 0, sizeof(struct vl_compositor)); + + compositor->pipe = pipe; + + if (!init_pipe_state(compositor)) + return false; + if (!init_shaders(compositor)) + { + cleanup_pipe_state(compositor); + return false; + } + if (!init_buffers(compositor)) + { + cleanup_shaders(compositor); + cleanup_pipe_state(compositor); + return false; + } + + return true; +} + +void vl_compositor_cleanup(struct vl_compositor *compositor) +{ + assert(compositor); + + cleanup_buffers(compositor); + cleanup_shaders(compositor); + cleanup_pipe_state(compositor); +} + +void vl_compositor_render(struct vl_compositor *compositor, + /*struct pipe_texture *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_texture *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_texture *past_surfaces, + unsigned num_future_surfaces, + struct pipe_texture *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_texture *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas*/ + struct pipe_fence_handle **fence) +{ + struct vertex_shader_consts *vs_consts; + + assert(compositor); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); + assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); + + compositor->fb_state.width = dst_surface->width[0]; + compositor->fb_state.height = dst_surface->height[0]; + compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface + ( + compositor->pipe->screen, + dst_surface, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + compositor->viewport.scale[0] = compositor->fb_state.width; + compositor->viewport.scale[1] = compositor->fb_state.height; + compositor->viewport.scale[2] = 1; + compositor->viewport.scale[3] = 1; + compositor->viewport.translate[0] = 0; + compositor->viewport.translate[1] = 0; + compositor->viewport.translate[2] = 0; + compositor->viewport.translate[3] = 0; + + compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state); + compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport); + compositor->pipe->bind_sampler_states(compositor->pipe, 1, &compositor->sampler); + compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface); + compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader); + compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); + compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); + compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf); + + vs_consts = pipe_buffer_map + ( + compositor->pipe->screen, + compositor->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width; + vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height; + vs_consts->dst_scale.z = 1; + vs_consts->dst_scale.w = 1; + vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width; + vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height; + vs_consts->dst_trans.z = 0; + vs_consts->dst_trans.w = 0; + + vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0]; + vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0]; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0]; + vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0]; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer); + + compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); + + pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL); +} diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h new file mode 100644 index 00000000000..2af41e1981f --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -0,0 +1,47 @@ +#ifndef vl_compositor_h +#define vl_compositor_h + +#include +#include +#include + +struct pipe_context; +struct pipe_texture; + +struct vl_compositor +{ + struct pipe_context *pipe; + + struct pipe_framebuffer_state fb_state; + void *sampler; + void *vertex_shader; + void *fragment_shader; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); + +void vl_compositor_cleanup(struct vl_compositor *compositor); + +void vl_compositor_render(struct vl_compositor *compositor, + /*struct pipe_texture *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_texture *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_texture *past_surfaces, + unsigned num_future_surfaces, + struct pipe_texture *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_texture *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas,*/ + struct pipe_fence_handle **fence); + +#endif /* vl_compositor_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c new file mode 100644 index 00000000000..7e73c5ced9f --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -0,0 +1,1662 @@ +#include "vl_mpeg12_mc_renderer.h" +#include +#include +#include +#include +#include +#include +#include +#include "vl_shader_build.h" + +#define DEFAULT_BUF_ALIGNMENT 1 +#define MACROBLOCK_WIDTH 16 +#define MACROBLOCK_HEIGHT 16 +#define BLOCK_WIDTH 8 +#define BLOCK_HEIGHT 8 +#define ZERO_BLOCK_NIL -1.0f +#define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f) + +struct vertex2f +{ + float x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex_shader_consts +{ + struct vertex4f denorm; +}; + +struct fragment_shader_consts +{ + struct vertex4f multiplier; + struct vertex4f div; +}; + +/* + * Muliplier renormalizes block samples from 16 bits to 12 bits. + * Divider is used when calculating Y % 2 for choosing top or bottom + * field for P or B macroblocks. + * TODO: Use immediates. + */ +static const struct fragment_shader_consts fs_consts = { + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +struct vert_stream_0 +{ + struct vertex2f pos; + struct vertex2f luma_tc; + struct vertex2f cb_tc; + struct vertex2f cr_tc; +}; + +enum MACROBLOCK_TYPE +{ + MACROBLOCK_TYPE_INTRA, + MACROBLOCK_TYPE_FWD_FRAME_PRED, + MACROBLOCK_TYPE_FWD_FIELD_PRED, + MACROBLOCK_TYPE_BKWD_FRAME_PRED, + MACROBLOCK_TYPE_BKWD_FIELD_PRED, + MACROBLOCK_TYPE_BI_FRAME_PRED, + MACROBLOCK_TYPE_BI_FIELD_PRED, + + NUM_MACROBLOCK_TYPES +}; + +static void +create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + */ + for (unsigned i = 0; i < 4; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + */ + for (unsigned i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (unsigned i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->i_vs = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + */ + for (unsigned i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (unsigned i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->i_fs = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref surface top field texcoords + * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (unsigned i = 0; i < 6; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock texcoords + */ + for (unsigned i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (unsigned i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock texcoords + */ + for (unsigned i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (unsigned i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) + */ + for (unsigned i = 0; i < 8; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock texcoords + * decl o5 ; Second ref macroblock texcoords + */ + for (unsigned i = 0; i < 6; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (unsigned i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (unsigned i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock texcoords + * decl i4 ; Second ref macroblock texcoords + */ + for (unsigned i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (unsigned i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from first ref macroblock + * tex2d t2, i4, s4 ; Read texel from second ref macroblock + */ + for (unsigned i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + for (unsigned i = 0; i < 3; ++i) + { + r->tex_transfer[i] = r->pipe->screen->get_tex_transfer + ( + r->pipe->screen, r->textures.all[i], + 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, + r->textures.all[i]->width[0], r->textures.all[i]->height[0] + ); + + r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); + } +} + +static void +xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + for (unsigned i = 0; i < 3; ++i) + { + r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); + r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); + } +} + +static bool +init_pipe_state(struct vl_mpeg12_mc_renderer *r) +{ + struct pipe_sampler_state sampler; + unsigned filters[5]; + + assert(r); + + r->viewport.scale[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->viewport.scale[1] = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + r->viewport.scale[2] = 1; + r->viewport.scale[3] = 1; + r->viewport.translate[0] = 0; + r->viewport.translate[1] = 0; + r->viewport.translate[2] = 0; + r->viewport.translate[3] = 0; + + r->fb_state.width = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->fb_state.height = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + r->fb_state.nr_cbufs = 1; + r->fb_state.zsbuf = NULL; + + /* Luma filter */ + filters[0] = PIPE_TEX_FILTER_NEAREST; + /* Chroma filters */ + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || + r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + { + filters[1] = PIPE_TEX_FILTER_NEAREST; + filters[2] = PIPE_TEX_FILTER_NEAREST; + } + else + { + filters[1] = PIPE_TEX_FILTER_LINEAR; + filters[2] = PIPE_TEX_FILTER_LINEAR; + } + /* Fwd, bkwd ref filters */ + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (unsigned i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ; */ + /*sampler.shadow_ambient = ; */ + /*sampler.lod_bias = ; */ + sampler.min_lod = 0; + /*sampler.max_lod = ; */ + /*sampler.border_color[i] = ; */ + /*sampler.max_anisotropy = ; */ + r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler); + } + + return true; +} + +static void +cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + for (unsigned i = 0; i < 5; ++i) + r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]); +} + +static bool +init_shaders(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + create_intra_vert_shader(r); + create_intra_frag_shader(r); + create_frame_pred_vert_shader(r); + create_frame_pred_frag_shader(r); + create_frame_bi_pred_vert_shader(r); + create_frame_bi_pred_frag_shader(r); + + return true; +} + +static void +cleanup_shaders(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + r->pipe->delete_vs_state(r->pipe, r->i_vs); + r->pipe->delete_fs_state(r->pipe, r->i_fs); + r->pipe->delete_vs_state(r->pipe, r->p_vs[0]); + r->pipe->delete_fs_state(r->pipe, r->p_fs[0]); + r->pipe->delete_vs_state(r->pipe, r->b_vs[0]); + r->pipe->delete_fs_state(r->pipe, r->b_fs[0]); +} + +static bool +init_buffers(struct vl_mpeg12_mc_renderer *r) +{ + struct pipe_texture template; + + const unsigned mbw = + align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; + const unsigned mbh = + align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; + + assert(r); + + r->macroblocks_per_batch = + mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1); + r->num_macroblocks = 0; + r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock)); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + template.height[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; + + r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); + + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) + { + template.width[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width / 2) : + r->picture_width / 2; + template.height[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_height / 2) : + r->picture_height / 2; + } + else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) + template.height[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_height / 2) : + r->picture_height / 2; + + r->textures.individual.cb = + r->pipe->screen->texture_create(r->pipe->screen, &template); + r->textures.individual.cr = + r->pipe->screen->texture_create(r->pipe->screen, &template); + + r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4; + r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1; + r->vertex_bufs.individual.ycbcr.buffer_offset = 0; + r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch + ); + + for (unsigned i = 1; i < 3; ++i) + { + r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2; + r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1; + r->vertex_bufs.all[i].buffer_offset = 0; + r->vertex_bufs.all[i].buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch + ); + } + + /* Position element */ + r->vertex_elems[0].src_offset = 0; + r->vertex_elems[0].vertex_buffer_index = 0; + r->vertex_elems[0].nr_components = 2; + r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Luma, texcoord element */ + r->vertex_elems[1].src_offset = sizeof(struct vertex2f); + r->vertex_elems[1].vertex_buffer_index = 0; + r->vertex_elems[1].nr_components = 2; + r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cr texcoord element */ + r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + r->vertex_elems[2].vertex_buffer_index = 0; + r->vertex_elems[2].nr_components = 2; + r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cb texcoord element */ + r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + r->vertex_elems[3].vertex_buffer_index = 0; + r->vertex_elems[3].nr_components = 2; + r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface top field texcoord element */ + r->vertex_elems[4].src_offset = 0; + r->vertex_elems[4].vertex_buffer_index = 1; + r->vertex_elems[4].nr_components = 2; + r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + r->vertex_elems[5].src_offset = sizeof(struct vertex2f); + r->vertex_elems[5].vertex_buffer_index = 1; + r->vertex_elems[5].nr_components = 2; + r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + r->vertex_elems[6].src_offset = 0; + r->vertex_elems[6].vertex_buffer_index = 2; + r->vertex_elems[6].nr_components = 2; + r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + r->vertex_elems[7].src_offset = sizeof(struct vertex2f); + r->vertex_elems[7].vertex_buffer_index = 2; + r->vertex_elems[7].nr_components = 2; + r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; + + r->vs_const_buf.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex_shader_consts) + ); + + r->fs_const_buf.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts) + ); + + memcpy + ( + pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer); + + return true; +} + +static void +cleanup_buffers(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + pipe_buffer_reference(&r->vs_const_buf.buffer, NULL); + pipe_buffer_reference(&r->fs_const_buf.buffer, NULL); + + for (unsigned i = 0; i < 3; ++i) + pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); + + for (unsigned i = 0; i < 3; ++i) + pipe_texture_reference(&r->textures.all[i], NULL); + + FREE(r->macroblock_buf); +} + +static enum MACROBLOCK_TYPE +get_macroblock_type(struct pipe_mpeg12_macroblock *mb) +{ + assert(mb); + + switch (mb->mb_type) + { + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + return MACROBLOCK_TYPE_INTRA; + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED; + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED; + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED; + default: + assert(0); + } + + /* Unreachable */ + return -1; +} + +/* XXX: One of these days this will have to be killed with fire */ +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \ + do { \ + (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + \ + if (!use_zb || (cbp) & (lm)) \ + { \ + (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ + (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ + (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ + (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ + } \ + \ + if (!use_zb || (cbp) & (cbm)) \ + { \ + (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ + (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ + (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ + (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ + } \ + \ + if (!use_zb || (cbp) & (crm)) \ + { \ + (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ + (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ + (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ + (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ + } \ + } while (0) + +static void +gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, + struct pipe_mpeg12_macroblock *mb, unsigned pos, + struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb) +{ + struct vertex2f mo_vec[2]; + + assert(r); + assert(mb); + assert(ycbcr_vb); + assert(pos < r->macroblocks_per_batch); + + switch (mb->mb_type) + { + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + { + struct vertex2f *vb; + + assert(ref_vb && ref_vb[1]); + + vb = ref_vb[1] + pos * 2 * 24; + + mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) + { + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; + + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + { + struct vertex2f *vb; + + assert(ref_vb && ref_vb[0]); + + vb = ref_vb[0] + pos * 2 * 24; + + if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) + { + mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) + { + mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) + { + mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y; + } + } + + if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) + { + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + { + const struct vertex2f unit = + { + r->surface_tex_inv_size.x * MACROBLOCK_WIDTH, + r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT + }; + const struct vertex2f half = + { + r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2), + r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2) + }; + const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE; + + struct vert_stream_0 *vb = ycbcr_vb + pos * 24; + + SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, 0, 0, half.x, half.y, + 32, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, half.x, 0, half.x, half.y, + 16, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, 0, half.y, half.x, half.y, + 8, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, half.x, half.y, half.x, half.y, + 4, 2, 1, use_zb, r->zero_block); + + break; + } + default: + assert(0); + } +} + +static void +gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r, + unsigned *num_macroblocks) +{ + unsigned offset[NUM_MACROBLOCK_TYPES]; + struct vert_stream_0 *ycbcr_vb; + struct vertex2f *ref_vb[2]; + + assert(r); + assert(num_macroblocks); + + for (unsigned i = 0; i < r->num_macroblocks; ++i) + { + enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); + ++num_macroblocks[mb_type]; + } + + offset[0] = 0; + + for (unsigned i = 1; i < NUM_MACROBLOCK_TYPES; ++i) + offset[i] = offset[i - 1] + num_macroblocks[i - 1]; + + ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map + ( + r->pipe->screen, + r->vertex_bufs.individual.ycbcr.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (unsigned i = 0; i < 2; ++i) + ref_vb[i] = (struct vertex2f *)pipe_buffer_map + ( + r->pipe->screen, + r->vertex_bufs.individual.ref[i].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (unsigned i = 0; i < r->num_macroblocks; ++i) + { + enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); + + gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type], + ycbcr_vb, ref_vb); + + ++offset[mb_type]; + } + + pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer); + for (unsigned i = 0; i < 2; ++i) + pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer); +} + +static void +flush(struct vl_mpeg12_mc_renderer *r) +{ + unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 }; + unsigned vb_start = 0; + struct vertex_shader_consts *vs_consts; + + assert(r); + assert(r->num_macroblocks == r->macroblocks_per_batch); + + gen_macroblock_stream(r, num_macroblocks); + + r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface + ( + r->pipe->screen, r->surface, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE + ); + + r->pipe->set_framebuffer_state(r->pipe, &r->fb_state); + r->pipe->set_viewport_state(r->pipe, &r->viewport); + + vs_consts = pipe_buffer_map + ( + r->pipe->screen, r->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->denorm.x = r->surface->width[0]; + vs_consts->denorm.y = r->surface->height[0]; + + pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); + + r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, + &r->vs_const_buf); + r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, + &r->fs_const_buf); + + if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); + r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->i_vs); + r->pipe->bind_fs_state(r->pipe, r->i_fs); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->future; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->future; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->textures.individual.ref[1] = r->future; + r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->b_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->b_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) + { + r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->textures.individual.ref[1] = r->future; + r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->b_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->b_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24; + } + + r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence); + pipe_surface_reference(&r->fb_state.cbufs[0], NULL); + + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + for (unsigned i = 0; i < 3; ++i) + r->zero_block[i].x = ZERO_BLOCK_NIL; + + r->num_macroblocks = 0; +} + +static void +grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch) +{ + assert(src); + assert(dst); + + for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) + memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +static void +grab_field_coded_block(short *src, short *dst, unsigned dst_pitch) +{ + assert(src); + assert(dst); + + for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) + memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +static void +fill_zero_block(short *dst, unsigned dst_pitch) +{ + assert(dst); + + for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) + memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2); +} + +static void +grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, + enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) +{ + unsigned tex_pitch; + short *texels; + unsigned tb = 0, sb = 0; + unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT; + + assert(r); + assert(blocks); + + tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size; + texels = r->texels[0] + mbpy * tex_pitch + mbpx; + + for (unsigned y = 0; y < 2; ++y) + { + for (unsigned x = 0; x < 2; ++x, ++tb) + { + if ((cbp >> (5 - tb)) & 1) + { + if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) + { + grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, + texels + y * tex_pitch * BLOCK_WIDTH + + x * BLOCK_WIDTH, tex_pitch); + } + else + { + grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, + texels + y * tex_pitch + x * BLOCK_WIDTH, + tex_pitch); + } + + ++sb; + } + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) + { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || + ZERO_BLOCK_IS_NIL(r->zero_block[0])) + { + fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch); + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + { + r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x; + r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y; + } + } + } + } + } + + /* TODO: Implement 422, 444 */ + assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); + + mbpx /= 2; + mbpy /= 2; + + for (tb = 0; tb < 2; ++tb) + { + tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size; + texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; + + if ((cbp >> (1 - tb)) & 1) + { + grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch); + ++sb; + } + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) + { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || + ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) + { + fill_zero_block(texels, tex_pitch); + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + { + r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x; + r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y; + } + } + } + } +} + +static void +grab_macroblock(struct vl_mpeg12_mc_renderer *r, + struct pipe_mpeg12_macroblock *mb) +{ + assert(r); + assert(mb); + assert(r->num_macroblocks < r->macroblocks_per_batch); + + memcpy(&r->macroblock_buf[r->num_macroblocks], mb, + sizeof(struct pipe_mpeg12_macroblock)); + + grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks); + + ++r->num_macroblocks; +} + +bool +vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_context *pipe, + unsigned picture_width, + unsigned picture_height, + enum pipe_video_chroma_format chroma_format, + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, + bool pot_buffers) +{ + assert(renderer); + assert(pipe); + /* TODO: Implement other policies */ + assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE); + /* TODO: Implement this */ + /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */ + assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE); + /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */ + assert(pot_buffers); + + memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer)); + + renderer->pipe = pipe; + renderer->picture_width = picture_width; + renderer->picture_height = picture_height; + renderer->chroma_format = chroma_format; + renderer->bufmode = bufmode; + renderer->eb_handling = eb_handling; + renderer->pot_buffers = pot_buffers; + + if (!init_pipe_state(renderer)) + return false; + if (!init_shaders(renderer)) + { + cleanup_pipe_state(renderer); + return false; + } + if (!init_buffers(renderer)) + { + cleanup_shaders(renderer); + cleanup_pipe_state(renderer); + return false; + } + + renderer->surface = NULL; + renderer->past = NULL; + renderer->future = NULL; + for (unsigned i = 0; i < 3; ++i) + renderer->zero_block[i].x = ZERO_BLOCK_NIL; + renderer->num_macroblocks = 0; + + xfer_buffers_map(renderer); + + return true; +} + +void +vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer) +{ + assert(renderer); + + xfer_buffers_unmap(renderer); + + cleanup_pipe_state(renderer); + cleanup_shaders(renderer); + cleanup_buffers(renderer); +} + +void +vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer + *renderer, + struct pipe_texture *surface, + struct pipe_texture *past, + struct pipe_texture *future, + unsigned num_macroblocks, + struct pipe_mpeg12_macroblock + *mpeg12_macroblocks, + struct pipe_fence_handle **fence) +{ + bool new_surface = false; + + assert(renderer); + assert(surface); + assert(num_macroblocks); + assert(mpeg12_macroblocks); + + if (renderer->surface) + { + if (surface != renderer->surface) + { + if (renderer->num_macroblocks > 0) + { + xfer_buffers_unmap(renderer); + flush(renderer); + } + + new_surface = true; + } + + /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ + assert(surface != renderer->surface || renderer->past == past); + assert(surface != renderer->surface || renderer->future == future); + } + else + new_surface = true; + + if (new_surface) + { + renderer->surface = surface; + renderer->past = past; + renderer->future = future; + renderer->fence = fence; + renderer->surface_tex_inv_size.x = 1.0f / surface->width[0]; + renderer->surface_tex_inv_size.y = 1.0f / surface->height[0]; + } + + while (num_macroblocks) + { + unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks; + unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); + + for (unsigned i = 0; i < num_to_submit; ++i) + { + assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); + grab_macroblock(renderer, &mpeg12_macroblocks[i]); + } + + num_macroblocks -= num_to_submit; + + if (renderer->num_macroblocks == renderer->macroblocks_per_batch) + { + xfer_buffers_unmap(renderer); + flush(renderer); + xfer_buffers_map(renderer); + /* Next time we get this surface it may have new ref frames */ + renderer->surface = NULL; + } + } +} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h new file mode 100644 index 00000000000..dfe0f7a24b8 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -0,0 +1,93 @@ +#ifndef vl_mpeg12_mc_renderer_h +#define vl_mpeg12_mc_renderer_h + +#include +#include +#include + +struct pipe_context; +struct pipe_video_surface; +struct pipe_macroblock; + +/* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */ +enum VL_MPEG12_MC_RENDERER_BUFFER_MODE +{ + VL_MPEG12_MC_RENDERER_BUFFER_SLICE, /* Saves memory at the cost of smaller batches */ + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */ +}; + +enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK +{ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL, /* Waste of memory bandwidth */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, /* Can only do point-filtering when interpolating subsampled chroma channels */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */ +}; + +struct vl_mpeg12_mc_renderer +{ + struct pipe_context *pipe; + unsigned picture_width; + unsigned picture_height; + enum pipe_video_chroma_format chroma_format; + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode; + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling; + bool pot_buffers; + unsigned macroblocks_per_batch; + + struct pipe_viewport_state viewport; + struct pipe_constant_buffer vs_const_buf; + struct pipe_constant_buffer fs_const_buf; + struct pipe_framebuffer_state fb_state; + struct pipe_vertex_element vertex_elems[8]; + + union + { + void *all[5]; + struct { void *y, *cb, *cr, *ref[2]; } individual; + } samplers; + + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + + union + { + struct pipe_texture *all[5]; + struct { struct pipe_texture *y, *cb, *cr, *ref[2]; } individual; + } textures; + + union + { + struct pipe_vertex_buffer all[3]; + struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual; + } vertex_bufs; + + struct pipe_texture *surface, *past, *future; + struct pipe_fence_handle **fence; + unsigned num_macroblocks; + struct pipe_mpeg12_macroblock *macroblock_buf; + struct pipe_transfer *tex_transfer[3]; + short *texels[3]; + struct { float x, y; } surface_tex_inv_size; + struct { float x, y; } zero_block[3]; +}; + +bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_context *pipe, + unsigned picture_width, + unsigned picture_height, + enum pipe_video_chroma_format chroma_format, + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, + bool pot_buffers); + +void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer); + +void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_texture *surface, + struct pipe_texture *past, + struct pipe_texture *future, + unsigned num_macroblocks, + struct pipe_mpeg12_macroblock *mpeg12_macroblocks, + struct pipe_fence_handle **fence); + +#endif /* vl_mpeg12_mc_renderer_h */ diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c new file mode 100644 index 00000000000..5a4a5ab72cc --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -0,0 +1,215 @@ +#include "vl_shader_build.h" +#include +#include +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.Declaration.Interpolate = interpolation;; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = src_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = tex; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.FullSrcRegisters[2].SrcRegister.File = src3_file; + inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h new file mode 100644 index 00000000000..c6c60b55526 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_shader_build.h @@ -0,0 +1,61 @@ +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 6ab37537628..bcb887a0b26 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -31,6 +31,7 @@ C_SOURCES = \ sp_tex_sample.c \ sp_tex_tile_cache.c \ sp_tile_cache.c \ - sp_surface.c + sp_surface.c \ + sp_video_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 950c3d9955b..aac9edf44e6 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -33,6 +33,7 @@ softpipe = env.ConvenienceLibrary( 'sp_tex_tile_cache.c', 'sp_texture.c', 'sp_tile_cache.c', + 'sp_video_context.c', ]) -Export('softpipe') \ No newline at end of file +Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 49b51afda7e..45289380d08 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -381,6 +381,59 @@ softpipe_transfer_unmap(struct pipe_screen *screen, } } +static struct pipe_video_surface* +softpipe_video_surface_create(struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct softpipe_video_surface *sp_vsfc; + struct pipe_texture template; + + assert(screen); + assert(width && height); + + sp_vsfc = CALLOC_STRUCT(softpipe_video_surface); + if (!sp_vsfc) + return NULL; + + pipe_reference_init(&sp_vsfc->base.reference, 1); + sp_vsfc->base.screen = screen; + sp_vsfc->base.chroma_format = chroma_format; + /*sp_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/ + sp_vsfc->base.width = width; + sp_vsfc->base.height = height; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.last_level = 0; + /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ + template.width[0] = util_next_power_of_two(width); + template.height[0] = util_next_power_of_two(height); + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + sp_vsfc->tex = screen->texture_create(screen, &template); + if (!sp_vsfc->tex) + { + FREE(sp_vsfc); + return NULL; + } + + return &sp_vsfc->base; +} + + +static void +softpipe_video_surface_destroy(struct pipe_video_surface *vsfc) +{ + struct softpipe_video_surface *sp_vsfc = softpipe_video_surface(vsfc); + + pipe_texture_reference(&sp_vsfc->tex, NULL); + FREE(sp_vsfc); +} + void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) @@ -396,6 +449,9 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) screen->tex_transfer_destroy = softpipe_tex_transfer_destroy; screen->transfer_map = softpipe_transfer_map; screen->transfer_unmap = softpipe_transfer_unmap; + + screen->video_surface_create = softpipe_video_surface_create; + screen->video_surface_destroy = softpipe_video_surface_destroy; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 2537ab6a40d..2ef64e1e7c3 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -30,6 +30,7 @@ #include "pipe/p_state.h" +#include "pipe/p_video_state.h" struct pipe_context; @@ -62,6 +63,15 @@ struct softpipe_transfer unsigned long offset; }; +struct softpipe_video_surface +{ + struct pipe_video_surface base; + + /* The data is held here: + */ + struct pipe_texture *tex; +}; + /** cast wrappers */ static INLINE struct softpipe_texture * @@ -76,6 +86,12 @@ softpipe_transfer(struct pipe_transfer *pt) return (struct softpipe_transfer *) pt; } +static INLINE struct softpipe_video_surface * +softpipe_video_surface(struct pipe_video_surface *pvs) +{ + return (struct softpipe_video_surface *) pvs; +} + extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c new file mode 100644 index 00000000000..1b47bbede26 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -0,0 +1,273 @@ +#include "sp_video_context.h" +#include +#include +#include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" + +static void +sp_mpeg12_destroy(struct pipe_video_context *vpipe) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + + /* Asserted in softpipe_delete_fs_state() for some reason */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + + ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend); + ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast); + ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + + pipe_video_surface_reference(&ctx->decode_target, NULL); + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + + FREE(ctx); +} + +static void +sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe, + struct pipe_video_surface *past, + struct pipe_video_surface *future, + unsigned num_macroblocks, + struct pipe_macroblock *macroblocks, + struct pipe_fence_handle **fence) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks; + + assert(vpipe); + assert(num_macroblocks); + assert(macroblocks); + assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + assert(ctx->decode_target); + + vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer, + softpipe_video_surface(ctx->decode_target)->tex, + past ? softpipe_video_surface(past)->tex : NULL, + future ? softpipe_video_surface(future)->tex : NULL, + num_macroblocks, mpeg12_macroblocks, fence); +} + +static void +sp_mpeg12_clear_surface(struct pipe_video_context *vpipe, + unsigned x, unsigned y, + unsigned width, unsigned height, + unsigned value, + struct pipe_surface *surface) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(surface); + + ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value); +} + +static void +sp_mpeg12_render_picture(struct pipe_video_context *vpipe, + /*struct pipe_surface *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_video_surface *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_video_surface *past_surfaces, + unsigned num_future_surfaces, + struct pipe_video_surface *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_surface *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas*/ + struct pipe_fence_handle **fence) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); + + vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex, + picture_type, src_area, dst_surface->texture, dst_area, fence); +} + +static void +sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe, + struct pipe_video_surface *dt) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(dt); + + pipe_video_surface_reference(&ctx->decode_target, dt); +} + +static bool +init_pipe_state(struct sp_mpeg12_context *ctx) +{ + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + + assert(ctx); + + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + rast.bypass_vs_clip_and_viewport = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast); + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend); + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (unsigned i = 0; i < 2; ++i) + { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa); + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + + return true; +} + +static struct pipe_video_context * +sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct sp_mpeg12_context *ctx; + + assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12); + + ctx = CALLOC_STRUCT(sp_mpeg12_context); + + if (!ctx) + return NULL; + + ctx->base.profile = profile; + ctx->base.chroma_format = chroma_format; + ctx->base.width = width; + ctx->base.height = height; + + ctx->base.screen = screen; + ctx->base.destroy = sp_mpeg12_destroy; + ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks; + ctx->base.clear_surface = sp_mpeg12_clear_surface; + ctx->base.render_picture = sp_mpeg12_render_picture; + ctx->base.set_decode_target = sp_mpeg12_set_decode_target; + + ctx->pipe = softpipe_create(screen); + if (!ctx->pipe) + { + FREE(ctx); + return NULL; + } + + /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */ + if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe, + width, height, chroma_format, + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE, + /* TODO: Use XFER_NONE when implemented */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, + true)) + { + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) + { + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + if (!init_pipe_state(ctx)) + { + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + return &ctx->base; +} + +struct pipe_video_context * +sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + assert(screen); + assert(width && height); + + switch (u_reduce_video_profile(profile)) + { + case PIPE_VIDEO_CODEC_MPEG12: + return sp_mpeg12_create(screen, profile, + chroma_format, + width, height); + default: + return NULL; + } +} diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h new file mode 100644 index 00000000000..a70ce9f476a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_video_context.h @@ -0,0 +1,30 @@ +#ifndef SP_VIDEO_CONTEXT_H +#define SP_VIDEO_CONTEXT_H + +#include +#include +#include + +struct pipe_screen; +struct pipe_context; +struct pipe_video_surface; + +struct sp_mpeg12_context +{ + struct pipe_video_context base; + struct pipe_context *pipe; + struct pipe_video_surface *decode_target; + struct vl_mpeg12_mc_renderer mc_renderer; + struct vl_compositor compositor; + + void *rast; + void *dsa; + void *blend; +}; + +struct pipe_video_context * +sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height); + +#endif /* SP_VIDEO_CONTEXT_H */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index f252d6df00d..1980831dd9d 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -315,6 +315,30 @@ enum pipe_transfer_usage { #define PIPE_REFERENCED_FOR_READ (1 << 0) #define PIPE_REFERENCED_FOR_WRITE (1 << 1) + +enum pipe_video_codec +{ + PIPE_VIDEO_CODEC_MPEG12, /**< MPEG1, MPEG2 */ + PIPE_VIDEO_CODEC_MPEG4, /**< DIVX, XVID */ + PIPE_VIDEO_CODEC_VC1, /**< WMV */ + PIPE_VIDEO_CODEC_MPEG4_AVC /**< H.264 */ +}; + +enum pipe_video_profile +{ + PIPE_VIDEO_PROFILE_MPEG1, + PIPE_VIDEO_PROFILE_MPEG2_SIMPLE, + PIPE_VIDEO_PROFILE_MPEG2_MAIN, + PIPE_VIDEO_PROFILE_MPEG4_SIMPLE, + PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE, + PIPE_VIDEO_PROFILE_VC1_SIMPLE, + PIPE_VIDEO_PROFILE_VC1_MAIN, + PIPE_VIDEO_PROFILE_VC1_ADVANCED, + PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE, + PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN, + PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH +}; + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index c4469d4a9e9..af230809201 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -613,6 +613,24 @@ pf_has_alpha( enum pipe_format format ) } } +enum pipe_video_chroma_format +{ + PIPE_VIDEO_CHROMA_FORMAT_420, + PIPE_VIDEO_CHROMA_FORMAT_422, + PIPE_VIDEO_CHROMA_FORMAT_444 +}; + +#if 0 +enum pipe_video_surface_format +{ + PIPE_VIDEO_SURFACE_FORMAT_NV12, /**< Planar; Y plane, UV plane */ + PIPE_VIDEO_SURFACE_FORMAT_YV12, /**< Planar; Y plane, U plane, V plane */ + PIPE_VIDEO_SURFACE_FORMAT_YUYV, /**< Interleaved; Y,U,Y,V,Y,U,Y,V */ + PIPE_VIDEO_SURFACE_FORMAT_UYVY, /**< Interleaved; U,Y,V,Y,U,Y,V,Y */ + PIPE_VIDEO_SURFACE_FORMAT_VUYA /**< Packed; A31-24|Y23-16|U15-8|V7-0 */ +}; +#endif + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 3f30c52a169..f0a4de5df33 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -53,7 +53,10 @@ extern "C" { struct pipe_fence_handle; struct pipe_winsys; struct pipe_buffer; - +struct pipe_texture; +struct pipe_surface; +struct pipe_video_surface; +struct pipe_transfer; /** @@ -252,6 +255,17 @@ struct pipe_screen { void (*buffer_destroy)( struct pipe_buffer *buf ); + /** + * Create a video surface suitable for use as a decoding target by the + * driver's pipe_video_context. + */ + struct pipe_video_surface* + (*video_surface_create)( struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height ); + + void (*video_surface_destroy)( struct pipe_video_surface *vsfc ); + /** * Do any special operations to ensure frontbuffer contents are diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h new file mode 100644 index 00000000000..937705ac505 --- /dev/null +++ b/src/gallium/include/pipe/p_video_context.h @@ -0,0 +1,92 @@ +#ifndef PIPE_VIDEO_CONTEXT_H +#define PIPE_VIDEO_CONTEXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +struct pipe_screen; +struct pipe_buffer; +struct pipe_surface; +struct pipe_video_surface; +struct pipe_macroblock; +struct pipe_picture_desc; +struct pipe_fence_handle; + +/** + * Gallium video rendering context + */ +struct pipe_video_context +{ + struct pipe_screen *screen; + enum pipe_video_profile profile; + enum pipe_video_chroma_format chroma_format; + unsigned width; + unsigned height; + + void *priv; /**< context private data (for DRI for example) */ + + void (*destroy)(struct pipe_video_context *vpipe); + + /** + * Picture decoding and displaying + */ + /*@{*/ + void (*decode_bitstream)(struct pipe_video_context *vpipe, + unsigned num_bufs, + struct pipe_buffer **bitstream_buf); + + void (*decode_macroblocks)(struct pipe_video_context *vpipe, + struct pipe_video_surface *past, + struct pipe_video_surface *future, + unsigned num_macroblocks, + struct pipe_macroblock *macroblocks, + struct pipe_fence_handle **fence); + + void (*clear_surface)(struct pipe_video_context *vpipe, + unsigned x, unsigned y, + unsigned width, unsigned height, + unsigned value, + struct pipe_surface *surface); + + void (*render_picture)(struct pipe_video_context *vpipe, + /*struct pipe_surface *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_video_surface *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_video_surface *past_surfaces, + unsigned num_future_surfaces, + struct pipe_video_surface *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas,*/ + struct pipe_fence_handle **fence); + /*@}*/ + + /** + * Parameter-like states (or properties) + */ + /*@{*/ + void (*set_picture_desc)(struct pipe_video_context *vpipe, + const struct pipe_picture_desc *desc); + + void (*set_decode_target)(struct pipe_video_context *vpipe, + struct pipe_video_surface *dt); + + /* TODO: Interface for CSC matrix, scaling modes, post-processing, etc. */ + /*@}*/ +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_VIDEO_CONTEXT_H */ diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h new file mode 100644 index 00000000000..a0128fbd48f --- /dev/null +++ b/src/gallium/include/pipe/p_video_state.h @@ -0,0 +1,158 @@ +#ifndef PIPE_VIDEO_STATE_H +#define PIPE_VIDEO_STATE_H + +/* u_reduce_video_profile() needs these */ +#include +#include + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_video_surface +{ + struct pipe_reference reference; + struct pipe_screen *screen; + enum pipe_video_chroma_format chroma_format; + /*enum pipe_video_surface_format surface_format;*/ + unsigned width; + unsigned height; +}; + +static INLINE void +pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_surface *surf) +{ + struct pipe_video_surface *old_surf = *ptr; + + if (pipe_reference((struct pipe_reference **)ptr, &surf->reference)) + old_surf->screen->video_surface_destroy(old_surf); +} + +struct pipe_video_rect +{ + unsigned x, y, w, h; +}; + +static INLINE enum pipe_video_codec +u_reduce_video_profile(enum pipe_video_profile profile) +{ + switch (profile) + { + case PIPE_VIDEO_PROFILE_MPEG1: + case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG2_MAIN: + return PIPE_VIDEO_CODEC_MPEG12; + + case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: + return PIPE_VIDEO_CODEC_MPEG4; + + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + case PIPE_VIDEO_PROFILE_VC1_MAIN: + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + return PIPE_VIDEO_CODEC_VC1; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + return PIPE_VIDEO_CODEC_MPEG4_AVC; + + default: + assert(false); + } + + return -1; +} + +enum pipe_mpeg12_picture_type +{ + PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP, + PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM, + PIPE_MPEG12_PICTURE_TYPE_FRAME +}; + +enum pipe_mpeg12_macroblock_type +{ + PIPE_MPEG12_MACROBLOCK_TYPE_INTRA, + PIPE_MPEG12_MACROBLOCK_TYPE_FWD, + PIPE_MPEG12_MACROBLOCK_TYPE_BKWD, + PIPE_MPEG12_MACROBLOCK_TYPE_BI, + + PIPE_MPEG12_MACROBLOCK_NUM_TYPES +}; + +enum pipe_mpeg12_motion_type +{ + PIPE_MPEG12_MOTION_TYPE_FIELD, + PIPE_MPEG12_MOTION_TYPE_FRAME, + PIPE_MPEG12_MOTION_TYPE_DUALPRIME, + PIPE_MPEG12_MOTION_TYPE_16x8 +}; + +enum pipe_mpeg12_dct_type +{ + PIPE_MPEG12_DCT_TYPE_FIELD, + PIPE_MPEG12_DCT_TYPE_FRAME +}; + +struct pipe_macroblock +{ + enum pipe_video_codec codec; +}; + +struct pipe_mpeg12_macroblock +{ + struct pipe_macroblock base; + + unsigned mbx; + unsigned mby; + enum pipe_mpeg12_macroblock_type mb_type; + enum pipe_mpeg12_motion_type mo_type; + enum pipe_mpeg12_dct_type dct_type; + signed pmv[2][2][2]; + unsigned cbp; + void *blocks; +}; + +#if 0 +struct pipe_picture_desc +{ + enum pipe_video_format format; +}; + +struct pipe_mpeg12_picture_desc +{ + struct pipe_picture_desc base; + + /* TODO: Use bitfields where possible? */ + struct pipe_surface *forward_reference; + struct pipe_surface *backward_reference; + unsigned picture_coding_type; + unsigned fcode; + unsigned intra_dc_precision; + unsigned picture_structure; + unsigned top_field_first; + unsigned frame_pred_frame_dct; + unsigned concealment_motion_vectors; + unsigned q_scale_type; + unsigned intra_vlc_format; + unsigned alternate_scan; + unsigned full_pel_forward_vector; + unsigned full_pel_backward_vector; + struct pipe_buffer *intra_quantizer_matrix; + struct pipe_buffer *non_intra_quantizer_matrix; + struct pipe_buffer *chroma_intra_quantizer_matrix; + struct pipe_buffer *chroma_non_intra_quantizer_matrix; +}; +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_VIDEO_STATE_H */ -- 2.30.2