g3dvl: Use R16_SNORM instead of A8L8_UNORM for block rendering.
authorYounes Manton <younes.m@gmail.com>
Wed, 30 Jul 2008 20:47:10 +0000 (16:47 -0400)
committerYounes Manton <younes.m@gmail.com>
Wed, 30 Jul 2008 20:47:10 +0000 (16:47 -0400)
src/gallium/state_trackers/g3dvl/vl_context.c
src/gallium/state_trackers/g3dvl/vl_data.c
src/gallium/state_trackers/g3dvl/vl_surface.c

index 638900b3f472ff94c7a957ed9f8c6335af8c0efb..542ba996a7e285ac6c85454d18f977c634e73a87 100644 (file)
@@ -365,7 +365,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context)
 
 static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
 {
-       const unsigned int              max_tokens = 50;
+       const unsigned int              max_tokens = 100;
        
        struct pipe_context             *pipe;
        struct pipe_shader_state        fs;
@@ -402,11 +402,19 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
                decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
                ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
        }
+       
+       /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
 
        /* decl o0                      ; Fragment color */
        decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
        ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
        
+       /* decl t0, t1 */
+       decl = vl_decl_temps(0, 1);
+       ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+       
        /*
         * decl s0                      ; Sampler for luma texture
         * decl s1                      ; Sampler for chroma Cb texture
@@ -419,16 +427,30 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * tex2d o0.x, i0, s0           ; Read texel from luma texture into .x channel
-        * tex2d o0.y, i1, s1           ; Read texel from chroma Cb texture into .y channel
-        * tex2d o0.z, i1, s2           ; Read texel from chroma Cr texture into .z channel
-       */
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
+        */
        for (i = 0; i < 3; ++i)
        {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+               ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+               
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
                inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+               
        }
+       
+       /* mul o0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
        /* end */
        inst = vl_end();
@@ -701,10 +723,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
                ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
        }
        
-       /*
-        * decl c0                      ; Multiplier to shift 9th bit of differential into place
-        * decl c1                      ; Bias to get differential back to a signed value
-        */
+       /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
        decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
        ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
 
@@ -729,38 +748,29 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * tex2d t0.xw, i0, s0          ; Read texel from luma texture into .x and .w channels
-        * mov t1.x, t0.w               ; Move 9th bit from .w channel to .x
-        * tex2d t0.yw, i1, s1          ; Read texel from chroma Cb texture into .y and .w channels
-        * mov t1.y, t0.w               ; Move 9th bit from .w channel to .y
-        * tex2d t0.zw, i1, s2          ; Read texel from chroma Cr texture into .z and .w channels
-        * mov t1.z, t0.w               ; Move 9th bit from .w channel to .z
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
         */
        for (i = 0; i < 3; ++i)
        {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
                
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
                inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+               
        }
        
-       /* mul t1, t1, c0               ; Muliply 9th bit by multiplier to shift it into place */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add t0, t0, t1               ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t0, t0, c1               ; Subtract bias to get back signed values */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
        /* tex2d t1, i2, s3             ; Read texel from ref macroblock */
@@ -826,11 +836,10 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * decl c0                      ; Multiplier to shift 9th bit of differential into place
-        * decl c1                      ; Bias to get differential back to a signed value
-        * decl c2                      ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
+        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+        * decl c1                      ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
         */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2);
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
        ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
 
        /* decl o0                      ; Fragment color */
@@ -854,42 +863,35 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * tex2d t0.xw, i0, s0          ; Read texel from luma texture into .x and .w channels
-        * mov t1.x, t0.w               ; Move 9th bit from .w channel to .x
-        * tex2d t0.yw, i1, s1          ; Read texel from chroma Cb texture into .y and .w channels
-        * mov t1.y, t0.w               ; Move 9th bit from .w channel to .y
-        * tex2d t0.zw, i1, s2          ; Read texel from chroma Cr texture into .z and .w channels
-        * mov t1.z, t0.w               ; Move 9th bit from .w channel to .z
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
         */
        for (i = 0; i < 3; ++i)
        {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
                
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
                inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+               
        }
        
-       /* mul t1, t1, c0               ; Muliply 9th bit by multiplier to shift it into place */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add t0, t0, t1               ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t0, t0, c1               ; Subtract bias to get back signed values */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
-       /* tex2d t1, i2, s3             ; Read texel from ref macroblock top field
-          tex2d t2, i3, s3             ; Read texel from ref macroblock bottom field */
+       /*
+        * tex2d t1, i2, s3             ; Read texel from ref macroblock top field
+        * tex2d t2, i3, s3             ; Read texel from ref macroblock bottom field
+        */
        for (i = 0; i < 2; ++i)
        {
                inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
@@ -897,8 +899,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
        }
        
        /* XXX: Pos values off by 0.5? */
-       /* sub t4, i4.y, c2.x           ; Sub 0.5 from denormalized pos */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2);
+       /* sub t4, i4.y, c1.x           ; Sub 0.5 from denormalized pos */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1);
        inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -909,8 +911,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
        inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
-       /* mul t3, t4, c2.x             ; Multiply pos Y-coord by 1/2 */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2);
+       /* mul t3, t4, c1.x             ; Multiply pos Y-coord by 1/2 */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
        inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -921,8 +923,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
        inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
-       /* mul t3, t3, c2.y             ; Multiply by 2 */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2);
+       /* mul t3, t3, c1.y             ; Multiply by 2 */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
        inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -1224,11 +1226,10 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * decl c0                      ; Multiplier to shift 9th bit of differential into place
-        * decl c1                      ; Bias to get differential back to a signed value
-        * decl c2                      ; Constant 1/2 in .x channel to use as weight to blend past and future texels
+        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+        * decl c1                      ; Constant 1/2 in .x channel to use as weight to blend past and future texels
         */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2);
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
        ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
 
        /* decl o0                      ; Fragment color */
@@ -1253,38 +1254,29 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * tex2d t0.xw, i0, s0          ; Read texel from luma texture into .x and .w channels
-        * mov t1.x, t0.w               ; Move 9th bit from .w channel to .x
-        * tex2d t0.yw, i1, s1          ; Read texel from chroma Cb texture into .y and .w channels
-        * mov t1.y, t0.w               ; Move 9th bit from .w channel to .y
-        * tex2d t0.zw, i1, s2          ; Read texel from chroma Cr texture into .z and .w channels
-        * mov t1.z, t0.w               ; Move 9th bit from .w channel to .z
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
         */
        for (i = 0; i < 3; ++i)
        {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
                
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
                inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+               
        }
        
-       /* mul t1, t1, c0               ; Muliply 9th bit by multiplier to shift it into place */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add t0, t0, t1               ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t0, t0, c1               ; Subtract bias to get back signed values */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
        /*
@@ -1297,8 +1289,8 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        }
        
-       /* lerp t1, c2.x, t1, t2        ; Blend past and future texels */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+       /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
        inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1366,12 +1358,11 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * decl c0                      ; Multiplier to shift 9th bit of differential into place
-        * decl c1                      ; Bias to get differential back to a signed value
-        * decl c2                      ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
+        * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+        * decl c1                      ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
         *                              ; and for Y-mod-2 top/bottom field selection
         */
-       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2);
+       decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
        ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
 
        /* decl o0                      ; Fragment color */
@@ -1396,43 +1387,34 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
        }
        
        /*
-        * tex2d t0.xw, i0, s0          ; Read texel from luma texture into .x and .w channels
-        * mov t1.x, t0.w               ; Move 9th bit from .w channel to .x
-        * tex2d t0.yw, i1, s1          ; Read texel from chroma Cb texture into .y and .w channels
-        * mov t1.y, t0.w               ; Move 9th bit from .w channel to .y
-        * tex2d t0.zw, i1, s2          ; Read texel from chroma Cr texture into .z and .w channels
-        * mov t1.z, t0.w               ; Move 9th bit from .w channel to .z
+        * tex2d t1, i0, s0             ; Read texel from luma texture
+        * mov t0.x, t1.x               ; Move luma sample into .x component
+        * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
+        * mov t0.y, t1.x               ; Move Cb sample into .y component
+        * tex2d t1, i1, s2             ; Read texel from chroma Cr texture
+        * mov t0.z, t1.x               ; Move Cr sample into .z component
         */
        for (i = 0; i < 3; ++i)
        {
-               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
-               inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+               inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
                
-               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+               inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
                inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-               inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
                ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+               
        }
        
-       /* mul t1, t1, c0               ; Muliply 9th bit by multiplier to shift it into place */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* add t0, t0, t1               ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
-       inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-       
-       /* sub t0, t0, c1               ; Subtract bias to get back signed values */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+       /* mul t0, t0, c0               ; Rescale texel to correct range */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
        /* XXX: Pos values off by 0.5? */
-       /* sub t4, i6.y, c2.x           ; Sub 0.5 from denormalized pos */
-       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2);
+       /* sub t4, i6.y, c1.x           ; Sub 0.5 from denormalized pos */
+       inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1);
        inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -1443,8 +1425,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
        inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
-       /* mul t3, t4, c2.x             ; Multiply pos Y-coord by 1/2 */
-       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2);
+       /* mul t3, t4, c1.x             ; Multiply pos Y-coord by 1/2 */
+       inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
        inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1455,8 +1437,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
        inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
-       /* mul t3, t3, c2.y             ; Multiply by 2 */
-       inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2);
+       /* mul t3, t3, c1.y             ; Multiply by 2 */
+       inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
        inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
        inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -1497,8 +1479,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
        inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
        
-       /* lerp t1, c2.x, t1, t2        ; Blend past and future texels */
-       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+       /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
+       inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
        inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
        inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1678,7 +1660,7 @@ static int vlInitMC(struct VL_CONTEXT *context)
        
        memset(&template, 0, sizeof(struct pipe_texture));
        template.target = PIPE_TEXTURE_2D;
-       template.format = PIPE_FORMAT_A8L8_UNORM;
+       template.format = PIPE_FORMAT_R16_SNORM;
        template.last_level = 0;
        template.width[0] = 8;
        template.height[0] = 8 * 4;
index 7e6ee8ac12fadc9c7edb82bf48ee69087a3917eb..0e5c8c77f90aa7b48740ae129ee84cef053742c5 100644 (file)
@@ -86,8 +86,7 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)
  */
 const struct VL_MC_FS_CONSTS vl_mc_fs_consts =
 {
-       {256.0f, 256.0f, 256.0f, 0.0f},
-       {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f},
+       {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
        {0.5f, 2.0f, 0.0f, 0.0f}
 };
 
index 9b91ab4e2238e69c5d2b4d66541c9d281fc7a013..6d4e14b95c3922deb3cd2e2ecd979e293a250d61 100644 (file)
@@ -63,7 +63,7 @@ static int vlTransformBlock(short *src, short *dst, short bias)
        return 0;
 }
 
-static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
+static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
 {
        unsigned int y;
        
@@ -78,18 +78,7 @@ static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pi
        return 0;
 }
 
-static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch)
-{
-       unsigned int x, y;
-       
-       for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
-               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                       dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
-       
-       return 0;
-}
-
-static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
+static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
 {
        unsigned int y;
        
@@ -114,30 +103,17 @@ static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pi
        return 0;
 }
 
-static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch)
-{
-       unsigned int x, y;
-       
-       for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
-               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                       dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
-       
-       dst += VL_BLOCK_HEIGHT * dst_pitch;
-       
-       for (; y < VL_BLOCK_HEIGHT; ++y)
-               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                       dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
-       
-       return 0;
-}
-
 static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
 {
-       unsigned int x, y;
+       unsigned int y;
        
        for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
-               for (x = 0; x < VL_BLOCK_WIDTH; ++x)
-                       dst[y * dst_pitch + x] = 0x100;
+               memset
+               (
+                       dst + y * dst_pitch,
+                       0,
+                       VL_BLOCK_WIDTH * 2
+               );
        
        return 0;
 }
@@ -156,7 +132,6 @@ static int vlGrabBlocks
        unsigned int            tex_pitch;
        unsigned int            tb, sb = 0;
        
-       const int               do_idct = 1;
        short                   temp_block[64];
        
        assert(context);
@@ -176,80 +151,26 @@ static int vlGrabBlocks
        {
                if ((coded_block_pattern >> (5 - tb)) & 1)
                {
+                       if (sample_type == VL_FULL_SAMPLE)
+                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
+                       else
+                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+                       
                        if (dct_type == VL_DCT_FRAME_CODED)
-                               if (sample_type == VL_FULL_SAMPLE)
-                                       if (do_idct)
-                                       {
-                                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
-                                               vlGrabFrameCodedFullBlock
-                                               (
-                                                       temp_block,
-                                                       texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
-                                                       tex_pitch
-                                               );
-                                       }
-                                       else
-                                       vlGrabFrameCodedFullBlock
-                                       (
-                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
-                                               texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
-                                               tex_pitch
-                                       );
-                               else
-                                       if (do_idct)
-                                       {
-                                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
-                                               vlGrabFrameCodedDiffBlock
-                                               (
-                                                       temp_block,
-                                                       texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
-                                                       tex_pitch
-                                               );
-                                       }
-                                       else
-                                       vlGrabFrameCodedDiffBlock
-                                       (
-                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
-                                               texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
-                                               tex_pitch
-                                       );
+                               vlGrabFrameCodedBlock
+                               (
+                                       temp_block,
+                                       texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
+                                       tex_pitch
+                               );
                        else
-                               if (sample_type == VL_FULL_SAMPLE)
-                                       if (do_idct)
-                                       {
-                                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
-                                               vlGrabFieldCodedFullBlock
-                                               (
-                                                       temp_block,
-                                                       texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
-                                                       tex_pitch
-                                               );
-                                       }
-                                       else
-                                       vlGrabFieldCodedFullBlock
-                                       (
-                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
-                                               texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
-                                               tex_pitch
-                                       );
-                               else
-                                       if (do_idct)
-                                       {
-                                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
-                                               vlGrabFieldCodedDiffBlock
-                                               (
-                                                       temp_block,
-                                                       texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
-                                                       tex_pitch
-                                               );
-                                       }
-                                       else
-                                       vlGrabFieldCodedDiffBlock
-                                       (
-                                               blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
-                                               texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
-                                               tex_pitch
-                                       );
+                               vlGrabFieldCodedBlock
+                               (
+                                       blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+                                       texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
+                                       tex_pitch
+                               );
+                       
                        ++sb;
                }
                else
@@ -272,43 +193,18 @@ static int vlGrabBlocks
                tex_pitch = tex_surface->stride / tex_surface->block.size;
                
                if ((coded_block_pattern >> (1 - tb)) & 1)
-               {                       
+               {
                        if (sample_type == VL_FULL_SAMPLE)
-                               if (do_idct)
-                               {
-                                       vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
-                                       vlGrabFrameCodedFullBlock
-                                       (
-                                               temp_block,
-                                               texels,
-                                               tex_pitch
-                                       );
-                               }
-                               else
-                               vlGrabFrameCodedFullBlock
-                               (
-                                       blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
-                                       texels,
-                                       tex_pitch
-                               );
+                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
                        else
-                               if (do_idct)
-                               {
-                                       vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
-                                       vlGrabFrameCodedDiffBlock
-                                       (
-                                               temp_block,
-                                               texels,
-                                               tex_pitch
-                                       );
-                               }
-                               else
-                               vlGrabFrameCodedDiffBlock
-                               (
-                                       blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
-                                       texels,
-                                       tex_pitch
-                               );
+                               vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+                       
+                       vlGrabFrameCodedBlock
+                       (
+                               temp_block,
+                               texels,
+                               tex_pitch
+                       );
                        
                        ++sb;
                }