From 6235141fd2c7af21c2b41ca66f06abc3cb0bbc24 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 6 Jul 2008 22:04:29 -0400
Subject: [PATCH] g3dvl: IDCT part 1.

Very basic IDCT support is in, performed CPU-side for now.
---
 src/gallium/state_trackers/g3dvl/vl_context.c | 241 +++++++++++++++++-
 src/gallium/state_trackers/g3dvl/vl_context.h |  10 +
 src/gallium/state_trackers/g3dvl/vl_surface.c | 136 +++++++++-
 src/libXvMC/surface.c                         |   2 +
 4 files changed, 381 insertions(+), 8 deletions(-)

diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c
index 58971bd7c79..1668ad1651b 100644
--- a/src/gallium/state_trackers/g3dvl/vl_context.c
+++ b/src/gallium/state_trackers/g3dvl/vl_context.c
@@ -11,22 +11,262 @@
 #include <tgsi/util/tgsi_build.h>
 #include "vl_shader_build.h"
 #include "vl_data.h"
+#include "vl_defs.h"
 #include "vl_util.h"
 
+static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context)
+{
+	const unsigned int		max_tokens = 50;
+
+	struct pipe_context		*pipe;
+	struct pipe_shader_state	fs;
+	struct tgsi_token		*tokens;
+	struct tgsi_header		*header;
+
+	struct tgsi_full_declaration	decl;
+	struct tgsi_full_instruction	inst;
+	
+	unsigned int			ti;
+	unsigned int			i;
+	
+	assert(context);
+	
+	pipe = context->pipe;
+	tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+	/* Version */
+	*(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+	/* Header */
+	header = (struct tgsi_header*)&tokens[1];
+	*header = tgsi_build_header();
+	/* Processor */
+	*(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+	ti = 3;
+	
+	/*
+	 * decl i0		; Vertex pos
+	 * decl i1		; Vertex texcoords
+	 */
+	for (i = 0; i < 2; i++)
+	{
+		decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+		ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+	}
+	
+	/*
+	 * decl o0		; Vertex pos
+	 * decl o1		; Vertex texcoords
+	 */
+	for (i = 0; i < 2; i++)
+	{
+		decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
+		ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+	}
+	
+	/*
+	 * mov o0, i0		; Move pos in to pos out
+	 * mov o1, i1		; Move texcoord in to texcoord out */
+	for (i = 0; i < 2; ++i)
+	{
+		inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
+		ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+	}
+	
+	/* end */
+	inst = vl_end();
+	ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+	fs.tokens = tokens;
+	//context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs);
+	free(tokens);
+	
+	return 0;
+}
+
+static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context)
+{
+	const unsigned int		max_tokens = 50;
+
+	struct pipe_context		*pipe;
+	struct pipe_shader_state	fs;
+	struct tgsi_token		*tokens;
+	struct tgsi_header		*header;
+
+	struct tgsi_full_declaration	decl;
+	struct tgsi_full_instruction	inst;
+	
+	unsigned int			ti;
+	unsigned int			i;
+	
+	assert(context);
+	
+	pipe = context->pipe;
+	tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
+
+	/* Version */
+	*(struct tgsi_version*)&tokens[0] = tgsi_build_version();
+	/* Header */
+	header = (struct tgsi_header*)&tokens[1];
+	*header = tgsi_build_header();
+	/* Processor */
+	*(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
+
+	ti = 3;
+
+	/* decl i0		; Texcoords for s0 */
+	decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
+	ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
+	/* decl o0		; Fragment color */
+	decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
+	ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+	
+	/* decl s0		; Sampler for tex containing picture to display */
+	decl = vl_decl_samplers(0, 0);
+	ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+	
+	/* tex2d t0, i0, s0	; Read src pixel */
+	inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
+	ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+	
+	/* sub t0, t0, c0	; Subtract bias vector from pixel */
+	inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+	ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+	
+	/*
+	 * dp4 o0.x, t0, c1	; Multiply pixel by the color conversion matrix
+	 * dp4 o0.y, t0, c2
+	 * dp4 o0.z, t0, c3
+	 * dp4 o0.w, t0, c4	; XXX: Don't need 4th coefficient
+	 */
+	for (i = 0; i < 4; ++i)
+	{
+		inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
+		inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+		ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+	}
+
+	/* end */
+	inst = vl_end();
+	ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+	fs.tokens = tokens;
+	//context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs);
+	free(tokens);
+	
+	return 0;
+}
+
 static int vlInitIDCT(struct VL_CONTEXT *context)
 {
+	struct pipe_context		*pipe;
+	struct pipe_sampler_state	sampler;
+	struct pipe_texture		template;
+	unsigned int			i;
+	
 	assert(context);
 	
+	pipe = context->pipe;
+	
+	context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH;
+	context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT;
+	context->states.idct.viewport.scale[2] = 1;
+	context->states.idct.viewport.scale[3] = 1;
+	context->states.idct.viewport.translate[0] = 0;
+	context->states.idct.viewport.translate[1] = 0;
+	context->states.idct.viewport.translate[2] = 0;
+	context->states.idct.viewport.translate[3] = 0;
+	
+	context->states.idct.render_target.width = VL_BLOCK_WIDTH;
+	context->states.idct.render_target.height = VL_BLOCK_HEIGHT;
+	context->states.idct.render_target.num_cbufs = 1;
+	context->states.idct.render_target.zsbuf = NULL;
+	
+	sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+	sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+	sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+	sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+	sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+	sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+	sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+	sampler.compare_func = PIPE_FUNC_ALWAYS;
+	sampler.normalized_coords = 1;
+	/*sampler.prefilter = ;*/
+	/*sampler.shadow_ambient = ;*/
+	/*sampler.lod_bias = ;*/
+	sampler.min_lod = 0;
+	/*sampler.max_lod = ;*/
+	/*sampler.border_color[i] = ;*/
+	/*sampler.max_anisotropy = ;*/
+	context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler);
+	
+	memset(&template, 0, sizeof(struct pipe_texture));
+	template.target = PIPE_TEXTURE_2D;
+	template.format = PIPE_FORMAT_A8L8_UNORM;
+	template.last_level = 0;
+	template.width[0] = 8;
+	template.height[0] = 8;
+	template.depth[0] = 1;
+	template.compressed = 0;
+	pf_get_block(template.format, &template.block);
+	
+	context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template);
+	
+	template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+	template.width[0] = 16;
+	template.height[0] = 1;
 	
+	context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template);
+	
+	for (i = 0; i < 2; ++i)
+	{
+		context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i];
+		context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i];
+		/*
+		context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F);
+		context->states.idct.vertex_bufs[i].max_index = 3;
+		context->states.idct.vertex_bufs[i].buffer_offset = 0;
+		context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create
+		(
+			pipe->winsys,
+			1,
+			PIPE_BUFFER_USAGE_VERTEX,
+			sizeof(struct VL_VERTEX2F) * 4
+		);
+	
+		context->states.idct.vertex_buf_elems[i].src_offset = 0;
+		context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i;
+		context->states.idct.vertex_buf_elems[i].nr_components = 2;
+		context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
+		*/
+	}
+	
+	vlCreateVertexShaderFrameIDCT(context);
+	vlCreateFragmentShaderFrameIDCT(context);
 	
 	return 0;
 }
 
 static int vlDestroyIDCT(struct VL_CONTEXT *context)
 {
+	//unsigned int i;
+	
 	assert(context);
 	
+	context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler);
+	
+	//for (i = 0; i < 2; ++i)
+		//context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer);
+	
+	pipe_texture_release(&context->states.idct.texture);
+	pipe_texture_release(&context->states.idct.basis);
+	
+	//context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs);
+	//context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs);
 	
+	//context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer);
+	//context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer);
 	
 	return 0;
 }
@@ -1271,7 +1511,6 @@ int vlCreateDataBufsMC(struct VL_CONTEXT *context)
 	context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 	
 	/* Create our texcoord buffers and texcoord buffer elements */
-	/* TODO: Should be able to use 1 texcoord buf for chroma textures, 1 buf for ref surfaces */
 	for (i = 1; i < 3; ++i)
 	{
 		context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F);
diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h
index 9ebda21a1cc..bff318854aa 100644
--- a/src/gallium/state_trackers/g3dvl/vl_context.h
+++ b/src/gallium/state_trackers/g3dvl/vl_context.h
@@ -26,6 +26,16 @@ struct VL_CONTEXT
 		
 		struct
 		{
+			struct pipe_viewport_state		viewport;
+			struct pipe_framebuffer_state		render_target;
+			struct pipe_sampler_state		*sampler;
+			struct pipe_texture			*texture;
+			struct pipe_texture			*basis;
+			struct pipe_shader_state		*frame_vs;
+			struct pipe_shader_state		*frame_fs;
+			struct pipe_vertex_buffer 		*vertex_bufs[2];
+			struct pipe_vertex_element		*vertex_buf_elems[2];
+			//struct pipe_constant_buffer		vs_const_buf, fs_const_buf;
 		} idct;
 		
 		struct
diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c
index 13f7301f07b..145ea32892a 100644
--- a/src/gallium/state_trackers/g3dvl/vl_surface.c
+++ b/src/gallium/state_trackers/g3dvl/vl_surface.c
@@ -9,6 +9,59 @@
 #include "vl_defs.h"
 #include "vl_util.h"
 
+static int vlTransformBlock(short *src, short *dst, short bias)
+{
+	static const float basis[8][8] =
+	{
+		{0.3536,   0.4904,   0.4619,   0.4157,   0.3536,   0.2778,   0.1913,   0.0975},
+		{0.3536,   0.4157,   0.1913,  -0.0975,  -0.3536,  -0.4904,  -0.4619,  -0.2778},
+		{0.3536,   0.2778,  -0.1913,  -0.4904,  -0.3536,   0.0975,   0.4619,   0.4157},
+		{0.3536,   0.0975,  -0.4619,  -0.2778,   0.3536,   0.4157,  -0.1913,  -0.4904},
+		{0.3536,  -0.0975,  -0.4619,   0.2778,   0.3536,  -0.4157,  -0.1913,   0.4904},
+		{0.3536,  -0.2778,  -0.1913,   0.4904,  -0.3536,  -0.0975,   0.4619,  -0.4157},
+		{0.3536,  -0.4157,   0.1913,   0.0975,  -0.3536,   0.4904,  -0.4619,   0.2778},
+		{0.3536,  -0.4904,   0.4619,  -0.4157,   0.3536,  -0.2778,   0.1913,  -0.0975}
+	};
+	
+	unsigned int	x, y;
+	short		tmp[64];
+	
+	for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+		for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+			tmp[y * VL_BLOCK_WIDTH + x] = (short)
+			(
+				src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] +
+				src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] +
+				src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] +
+				src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] +
+				src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] +
+				src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] +
+				src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] +
+				src[y * VL_BLOCK_WIDTH + 7] * basis[x][7]
+			);
+
+	for (x = 0; x < VL_BLOCK_WIDTH; ++x)
+		for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
+		{
+			dst[y * VL_BLOCK_WIDTH + x] = bias + (short)
+			(
+				tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] +
+				tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] +
+				tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] +
+				tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] +
+				tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] +
+				tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] +
+				tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] +
+				tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7]
+			);
+			if (dst[y * VL_BLOCK_WIDTH + x] > 255)
+				dst[y * VL_BLOCK_WIDTH + x] = 255;
+			else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0)
+				dst[y * VL_BLOCK_WIDTH + x] = 0;
+		}
+	return 0;
+}
+
 static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
 {
 	unsigned int y;
@@ -102,6 +155,9 @@ static int vlGrabBlocks
 	unsigned int		tex_pitch;
 	unsigned int		tb, sb = 0;
 	
+	const int		do_idct = 1;
+	short			temp_block[64];
+	
 	assert(context);
 	assert(blocks);
 	
@@ -121,6 +177,17 @@ static int vlGrabBlocks
 		{
 			if (dct_type == VL_DCT_FRAME_CODED)
 				if (sample_type == VL_FULL_SAMPLE)
+					if (do_idct)
+					{
+						vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
+						vlGrabFrameCodedFullBlock
+						(
+							temp_block,
+							texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
+							tex_pitch
+						);
+					}
+					else
 					vlGrabFrameCodedFullBlock
 					(
 						blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@@ -128,6 +195,17 @@ static int vlGrabBlocks
 						tex_pitch
 					);
 				else
+					if (do_idct)
+					{
+						vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+						vlGrabFrameCodedDiffBlock
+						(
+							temp_block,
+							texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
+							tex_pitch
+						);
+					}
+					else
 					vlGrabFrameCodedDiffBlock
 					(
 						blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@@ -136,6 +214,17 @@ static int vlGrabBlocks
 					);
 			else
 				if (sample_type == VL_FULL_SAMPLE)
+					if (do_idct)
+					{
+						vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
+						vlGrabFieldCodedFullBlock
+						(
+							temp_block,
+							texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
+							tex_pitch
+						);
+					}
+					else
 					vlGrabFieldCodedFullBlock
 					(
 						blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@@ -143,6 +232,17 @@ static int vlGrabBlocks
 						tex_pitch
 					);
 				else
+					if (do_idct)
+					{
+						vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+						vlGrabFieldCodedDiffBlock
+						(
+							temp_block,
+							texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
+							tex_pitch
+						);
+					}
+					else
 					vlGrabFieldCodedDiffBlock
 					(
 						blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@@ -173,6 +273,17 @@ static int vlGrabBlocks
 		if ((coded_block_pattern >> (1 - tb)) & 1)
 		{			
 			if (sample_type == VL_FULL_SAMPLE)
+				if (do_idct)
+				{
+					vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
+					vlGrabFrameCodedFullBlock
+					(
+						temp_block,
+						texels,
+						tex_pitch
+					);
+				}
+				else
 				vlGrabFrameCodedFullBlock
 				(
 					blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@@ -180,6 +291,17 @@ static int vlGrabBlocks
 					tex_pitch
 				);
 			else
+				if (do_idct)
+				{
+					vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+					vlGrabFrameCodedDiffBlock
+					(
+						temp_block,
+						texels,
+						tex_pitch
+					);
+				}
+				else
 				vlGrabFrameCodedDiffBlock
 				(
 					blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
@@ -266,6 +388,8 @@ int vlRenderIMacroBlock
 	if (picture_type != VL_FRAME_PICTURE)
 		return 0;
 	
+	vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
+	
 	pipe = surface->context->pipe;
 	
 	vs_consts = pipe->winsys->buffer_map
@@ -298,8 +422,6 @@ int vlRenderIMacroBlock
 	pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs);
 	pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs);
 	
-	vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks);
-	
 	pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
 	
 	return 0;
@@ -335,6 +457,8 @@ int vlRenderPMacroBlock
 	if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
 		return 0;
 	
+	vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
+	
 	pipe = surface->context->pipe;
 	
 	vs_consts = pipe->winsys->buffer_map
@@ -390,8 +514,6 @@ int vlRenderPMacroBlock
 	pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures);
 	pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers);
 	
-	vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
-	
 	pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
 	
 	return 0;
@@ -428,6 +550,8 @@ int vlRenderBMacroBlock
 	if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC)
 		return 0;
 	
+	vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
+	
 	pipe = surface->context->pipe;
 	
 	vs_consts = pipe->winsys->buffer_map
@@ -492,8 +616,6 @@ int vlRenderBMacroBlock
 	pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures);
 	pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers);
 	
-	vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks);
-	
 	pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24);
 	
 	return 0;
@@ -589,7 +711,7 @@ int vlPutSurface
 	pipe->set_sampler_textures(pipe, 1, &surface->texture);
 	pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
 	pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-	/* XXX: Need to take destx, desty into consideration */
+	/* TODO: Need to take destx, desty into consideration */
 	pipe->winsys->flush_frontbuffer
 	(
 		pipe->winsys,
diff --git a/src/libXvMC/surface.c b/src/libXvMC/surface.c
index 5656895650e..a550114655c 100644
--- a/src/libXvMC/surface.c
+++ b/src/libXvMC/surface.c
@@ -146,6 +146,8 @@ Status XvMCRenderSurface
 	
 	assert(flags == 0 || flags == XVMC_SECOND_FIELD);
 	
+	/* TODO: Batch macroblocks by type (I,P,B) */
+	
 	for (i = first_macroblock; i < first_macroblock + num_macroblocks; ++i)
 		if (macroblocks->macro_blocks[i].macroblock_type & XVMC_MB_TYPE_INTRA)
 			vlRenderIMacroBlock
-- 
2.30.2