From 1d27b4bc0f291ec955e59b1ead943100d8a15505 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 30 Jul 2008 16:47:10 -0400 Subject: [PATCH] g3dvl: Use R16_SNORM instead of A8L8_UNORM for block rendering. --- src/gallium/state_trackers/g3dvl/vl_context.c | 248 ++++++++---------- src/gallium/state_trackers/g3dvl/vl_data.c | 3 +- src/gallium/state_trackers/g3dvl/vl_surface.c | 178 +++---------- 3 files changed, 153 insertions(+), 276 deletions(-) diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 638900b3f47..542ba996a7e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -365,7 +365,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) { - const unsigned int max_tokens = 50; + const unsigned int max_tokens = 100; struct pipe_context *pipe; struct pipe_shader_state fs; @@ -402,11 +402,19 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -419,16 +427,30 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) } /* - * tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel - * tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel - * tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel - */ + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ inst = vl_end(); @@ -701,10 +723,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - */ + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -729,38 +748,29 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ @@ -826,11 +836,10 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) } /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - * decl c2 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -854,42 +863,35 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field - tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + /* + * tex2d t1, i2, s3 ; Read texel from ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field + */ for (i = 0; i < 2; ++i) { inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); @@ -897,8 +899,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) } /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c2.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2); + /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -909,8 +911,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -921,8 +923,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1224,11 +1226,10 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) } /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - * decl c2 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -1253,38 +1254,29 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* @@ -1297,8 +1289,8 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1366,12 +1358,11 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) } /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - * decl c2 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels * ; and for Y-mod-2 top/bottom field selection */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -1396,43 +1387,34 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c2.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2); + /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1443,8 +1425,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1455,8 +1437,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1497,8 +1479,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1678,7 +1660,7 @@ static int vlInitMC(struct VL_CONTEXT *context) memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_A8L8_UNORM; + template.format = PIPE_FORMAT_R16_SNORM; template.last_level = 0; template.width[0] = 8; template.height[0] = 8 * 4; diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index 7e6ee8ac12f..0e5c8c77f90 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -86,8 +86,7 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*) */ const struct VL_MC_FS_CONSTS vl_mc_fs_consts = { - {256.0f, 256.0f, 256.0f, 0.0f}, - {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f}, + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, {0.5f, 2.0f, 0.0f, 0.0f} }; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 9b91ab4e223..6d4e14b95c3 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -63,7 +63,7 @@ static int vlTransformBlock(short *src, short *dst, short bias) return 0; } -static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -78,18 +78,7 @@ static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pi return 0; } -static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int x, y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; - - return 0; -} - -static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -114,30 +103,17 @@ static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pi return 0; } -static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int x, y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; - - return 0; -} - static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) { - unsigned int x, y; + unsigned int y; for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch + x] = 0x100; + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); return 0; } @@ -156,7 +132,6 @@ static int vlGrabBlocks unsigned int tex_pitch; unsigned int tb, sb = 0; - const int do_idct = 1; short temp_block[64]; assert(context); @@ -176,80 +151,26 @@ static int vlGrabBlocks { if ((coded_block_pattern >> (5 - tb)) & 1) { + if (sample_type == VL_FULL_SAMPLE) + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + else + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + if (dct_type == VL_DCT_FRAME_CODED) - if (sample_type == VL_FULL_SAMPLE) - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - vlGrabFrameCodedFullBlock - ( - temp_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - } - else - vlGrabFrameCodedFullBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); - vlGrabFrameCodedDiffBlock - ( - temp_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - } - else - vlGrabFrameCodedDiffBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); + vlGrabFrameCodedBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); else - if (sample_type == VL_FULL_SAMPLE) - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - vlGrabFieldCodedFullBlock - ( - temp_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - } - else - vlGrabFieldCodedFullBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - else - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); - vlGrabFieldCodedDiffBlock - ( - temp_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - } - else - vlGrabFieldCodedDiffBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); + vlGrabFieldCodedBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + ++sb; } else @@ -272,43 +193,18 @@ static int vlGrabBlocks tex_pitch = tex_surface->stride / tex_surface->block.size; if ((coded_block_pattern >> (1 - tb)) & 1) - { + { if (sample_type == VL_FULL_SAMPLE) - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - vlGrabFrameCodedFullBlock - ( - temp_block, - texels, - tex_pitch - ); - } - else - vlGrabFrameCodedFullBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels, - tex_pitch - ); + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); else - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); - vlGrabFrameCodedDiffBlock - ( - temp_block, - texels, - tex_pitch - ); - } - else - vlGrabFrameCodedDiffBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels, - tex_pitch - ); + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + + vlGrabFrameCodedBlock + ( + temp_block, + texels, + tex_pitch + ); ++sb; } -- 2.30.2