From 6235141fd2c7af21c2b41ca66f06abc3cb0bbc24 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 6 Jul 2008 22:04:29 -0400 Subject: [PATCH] g3dvl: IDCT part 1. Very basic IDCT support is in, performed CPU-side for now. --- src/gallium/state_trackers/g3dvl/vl_context.c | 241 +++++++++++++++++- src/gallium/state_trackers/g3dvl/vl_context.h | 10 + src/gallium/state_trackers/g3dvl/vl_surface.c | 136 +++++++++- src/libXvMC/surface.c | 2 + 4 files changed, 381 insertions(+), 8 deletions(-) diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 58971bd7c79..1668ad1651b 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -11,22 +11,262 @@ #include #include "vl_shader_build.h" #include "vl_data.h" +#include "vl_defs.h" #include "vl_util.h" +static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move pos in to pos out + * mov o1, i1 ; Move texcoord in to texcoord out */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + static int vlInitIDCT(struct VL_CONTEXT *context) { + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int i; + assert(context); + pipe = context->pipe; + + context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH; + context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT; + context->states.idct.viewport.scale[2] = 1; + context->states.idct.viewport.scale[3] = 1; + context->states.idct.viewport.translate[0] = 0; + context->states.idct.viewport.translate[1] = 0; + context->states.idct.viewport.translate[2] = 0; + context->states.idct.viewport.translate[3] = 0; + + context->states.idct.render_target.width = VL_BLOCK_WIDTH; + context->states.idct.render_target.height = VL_BLOCK_HEIGHT; + context->states.idct.render_target.num_cbufs = 1; + context->states.idct.render_target.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8L8_UNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template); + + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.width[0] = 16; + template.height[0] = 1; + context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template); + + for (i = 0; i < 2; ++i) + { + context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i]; + context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i]; + /* + context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F); + context->states.idct.vertex_bufs[i].max_index = 3; + context->states.idct.vertex_bufs[i].buffer_offset = 0; + context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + context->states.idct.vertex_buf_elems[i].src_offset = 0; + context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.idct.vertex_buf_elems[i].nr_components = 2; + context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + */ + } + + vlCreateVertexShaderFrameIDCT(context); + vlCreateFragmentShaderFrameIDCT(context); return 0; } static int vlDestroyIDCT(struct VL_CONTEXT *context) { + //unsigned int i; + assert(context); + context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler); + + //for (i = 0; i < 2; ++i) + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer); + + pipe_texture_release(&context->states.idct.texture); + pipe_texture_release(&context->states.idct.basis); + + //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs); + //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs); + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer); + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer); return 0; } @@ -1271,7 +1511,6 @@ int vlCreateDataBufsMC(struct VL_CONTEXT *context) context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Create our texcoord buffers and texcoord buffer elements */ - /* TODO: Should be able to use 1 texcoord buf for chroma textures, 1 buf for ref surfaces */ for (i = 1; i < 3; ++i) { context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index 9ebda21a1cc..bff318854aa 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -26,6 +26,16 @@ struct VL_CONTEXT struct { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *sampler; + struct pipe_texture *texture; + struct pipe_texture *basis; + struct pipe_shader_state *frame_vs; + struct pipe_shader_state *frame_fs; + struct pipe_vertex_buffer *vertex_bufs[2]; + struct pipe_vertex_element *vertex_buf_elems[2]; + //struct pipe_constant_buffer vs_const_buf, fs_const_buf; } idct; struct diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 13f7301f07b..145ea32892a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -9,6 +9,59 @@ #include "vl_defs.h" #include "vl_util.h" +static int vlTransformBlock(short *src, short *dst, short bias) +{ + static const float basis[8][8] = + { + {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, + {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, + {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, + {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, + {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, + {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, + {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, + {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} + }; + + unsigned int x, y; + short tmp[64]; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + tmp[y * VL_BLOCK_WIDTH + x] = (short) + ( + src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + + src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + + src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + + src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + + src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + + src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + + src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + + src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] + ); + + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + { + dst[y * VL_BLOCK_WIDTH + x] = bias + (short) + ( + tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + + tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + + tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + + tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + + tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + + tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + + tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + + tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] + ); + if (dst[y * VL_BLOCK_WIDTH + x] > 255) + dst[y * VL_BLOCK_WIDTH + x] = 255; + else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) + dst[y * VL_BLOCK_WIDTH + x] = 0; + } + return 0; +} + static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -102,6 +155,9 @@ static int vlGrabBlocks unsigned int tex_pitch; unsigned int tb, sb = 0; + const int do_idct = 1; + short temp_block[64]; + assert(context); assert(blocks); @@ -121,6 +177,17 @@ static int vlGrabBlocks { if (dct_type == VL_DCT_FRAME_CODED) if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFrameCodedFullBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + } + else vlGrabFrameCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -128,6 +195,17 @@ static int vlGrabBlocks tex_pitch ); else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFrameCodedDiffBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + } + else vlGrabFrameCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -136,6 +214,17 @@ static int vlGrabBlocks ); else if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFieldCodedFullBlock + ( + temp_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + } + else vlGrabFieldCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -143,6 +232,17 @@ static int vlGrabBlocks tex_pitch ); else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFieldCodedDiffBlock + ( + temp_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + } + else vlGrabFieldCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -173,6 +273,17 @@ static int vlGrabBlocks if ((coded_block_pattern >> (1 - tb)) & 1) { if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFrameCodedFullBlock + ( + temp_block, + texels, + tex_pitch + ); + } + else vlGrabFrameCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -180,6 +291,17 @@ static int vlGrabBlocks tex_pitch ); else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFrameCodedDiffBlock + ( + temp_block, + texels, + tex_pitch + ); + } + else vlGrabFrameCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -266,6 +388,8 @@ int vlRenderIMacroBlock if (picture_type != VL_FRAME_PICTURE) return 0; + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); + pipe = surface->context->pipe; vs_consts = pipe->winsys->buffer_map @@ -298,8 +422,6 @@ int vlRenderIMacroBlock pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -335,6 +457,8 @@ int vlRenderPMacroBlock if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + pipe = surface->context->pipe; vs_consts = pipe->winsys->buffer_map @@ -390,8 +514,6 @@ int vlRenderPMacroBlock pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -428,6 +550,8 @@ int vlRenderBMacroBlock if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + pipe = surface->context->pipe; vs_consts = pipe->winsys->buffer_map @@ -492,8 +616,6 @@ int vlRenderBMacroBlock pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -589,7 +711,7 @@ int vlPutSurface pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - /* XXX: Need to take destx, desty into consideration */ + /* TODO: Need to take destx, desty into consideration */ pipe->winsys->flush_frontbuffer ( pipe->winsys, diff --git a/src/libXvMC/surface.c b/src/libXvMC/surface.c index 5656895650e..a550114655c 100644 --- a/src/libXvMC/surface.c +++ b/src/libXvMC/surface.c @@ -146,6 +146,8 @@ Status XvMCRenderSurface assert(flags == 0 || flags == XVMC_SECOND_FIELD); + /* TODO: Batch macroblocks by type (I,P,B) */ + for (i = first_macroblock; i < first_macroblock + num_macroblocks; ++i) if (macroblocks->macro_blocks[i].macroblock_type & XVMC_MB_TYPE_INTRA) vlRenderIMacroBlock -- 2.30.2