[g3dvl] implement workaround for missing blender clamp control
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_bitstream.c
index 7723d487d84ebc2006cd1a36919126f6dbc355a6..7a14efb627e2adff90e9088b479eebca6a8d4aea 100644 (file)
@@ -55,7 +55,6 @@
 #include <pipe/p_video_state.h>
 
 #include "vl_vlc.h"
-#include "vl_zscan.h"
 #include "vl_mpeg12_bitstream.h"
 
 /* take num bits from the high part of bit_buf and zero extend them */
 /* take num bits from the high part of bit_buf and sign extend them */
 #define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
 
-#define SATURATE(val)                  \
-do {                                   \
-   if ((uint32_t)(val + 2048) > 4095)  \
-      val = (val > 0) ? 2047 : -2048;  \
-} while (0)
-
 /* macroblock modes */
 #define MACROBLOCK_INTRA 1
 #define MACROBLOCK_PATTERN 2
@@ -721,16 +714,12 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
 }
 
 static inline void
-get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    const int scan[64], int quantizer_scale, short *dest)
+get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
-   int mismatch;
+   int i, val;
    const DCTtab *tab;
 
    i = 0;
-   mismatch = ~dest[0];
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -744,18 +733,13 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             break;     /* end of block */
 
       normal_code:
-         j = scan[i];
-
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+         val = tab->level * quantizer_scale;
 
-         /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -776,15 +760,11 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i >= 64)
             break;     /* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
-         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
+         val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
 
-         SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -817,21 +797,16 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
       break;   /* illegal, check needed to avoid buffer overflow */
    }
 
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 2);       /* dump end of block code */
 }
 
 static inline void
-get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    const int scan[64], int quantizer_scale, short *dest)
+get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
-   int mismatch;
+   int i, val;
    const DCTtab * tab;
 
    i = 0;
-   mismatch = ~dest[0];
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -844,17 +819,13 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i < 64) {
 
          normal_code:
-            j = scan[i];
             bs->vlc.buf <<= tab->len;
             bs->vlc.bits += tab->len + 1;
-            val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+            val = tab->level * quantizer_scale;
 
-            /* if (bitstream_get (1)) val = -val; */
             val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-            SATURATE (val);
             dest[i] = val;
-            mismatch ^= val;
 
             bs->vlc.buf <<= 1;
             vl_vlc_needbits(&bs->vlc);
@@ -874,15 +845,11 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             if (i >= 64)
                 break; /* illegal, check against buffer overflow */
 
-            j = scan[i];
-
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
-            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
+            val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
 
-            SATURATE (val);
             dest[i] = val;
-            mismatch ^= val;
 
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
@@ -916,21 +883,16 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
       break;   /* illegal, check needed to avoid buffer overflow */
    }
 
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 4);       /* dump end of block code */
 }
 
 static inline void
-get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    const int scan[64], int quantizer_scale, short *dest)
+get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
-   int mismatch;
+   int i, val;
    const DCTtab *tab;
 
    i = -1;
-   mismatch = 1;
 
    vl_vlc_needbits(&bs->vlc);
    if (bs->vlc.buf >= 0x28000000) {
@@ -950,17 +912,13 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             break;     /* end of block */
 
       normal_code:
-         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale) >> 1;
 
-         /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -984,16 +942,12 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i >= 64)
             break;     /* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[j]) / 32;
+         val = (val * quantizer_scale) / 2;
 
-         SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -1025,16 +979,13 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
       }
       break;   /* illegal, check needed to avoid buffer overflow */
    }
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 2);       /* dump end of block code */
 }
 
 static inline void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                      const int scan[64], int quantizer_scale, short *dest)
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int i, val;
    const DCTtab * tab;
 
    i = 0;
@@ -1051,10 +1002,9 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
             break;     /* end of block */
 
       normal_code:
-         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+         val = tab->level * quantizer_scale;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1062,7 +1012,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
 
          bs->vlc.buf <<= 1;
@@ -1084,8 +1033,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
          if (i >= 64)
             break;     /* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = vl_vlc_sbits(&bs->vlc, 8);
@@ -1093,12 +1040,11 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
             vl_vlc_dumpbits(&bs->vlc, 8);
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
-         val = (val * quantizer_scale * quant_matrix[j]) / 16;
+         val = val * quantizer_scale;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
 
-         SATURATE (val);
          dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1135,11 +1081,9 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
 }
 
 static inline void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                          const int scan[64], int quantizer_scale, short *dest)
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
+   int i, val;
    const DCTtab * tab;
 
    i = -1;
@@ -1162,10 +1106,9 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
             break;     /* end of block */
 
       normal_code:
-         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale) >> 1;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1173,7 +1116,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
 
          bs->vlc.buf <<= 1;
@@ -1198,8 +1140,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
          if (i >= 64)
             break;     /* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = vl_vlc_sbits(&bs->vlc, 8);
@@ -1208,12 +1148,11 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
          val = 2 * (val + SBITS (val, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[j]) / 32;
+         val = (val * quantizer_scale) / 2;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
 
-         SATURATE (val);
          dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1250,7 +1189,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
 }
 
 static inline void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
                  unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
 {
    short dest[64];
@@ -1269,14 +1208,14 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
       dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
 
    memset(dest, 0, sizeof(int16_t) * 64);
-   dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
-   if (picture->mpeg1) {
+   dest[0] = dc_dct_pred[cc];
+   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
       if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, picture, scan, quantizer_scale, dest);
+          get_mpeg1_intra_block(bs, quantizer_scale, dest);
    } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, picture, scan, quantizer_scale, dest);
+      get_intra_block_B15(bs, quantizer_scale, dest);
    else
-      get_intra_block_B14(bs, picture, scan, quantizer_scale, dest);
+      get_intra_block_B14(bs, quantizer_scale, dest);
 
    memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
 
@@ -1286,8 +1225,8 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 }
 
 static inline void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
-                    unsigned x, unsigned y, int quantizer_scale, enum pipe_mpeg12_dct_type coding)
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
+                    unsigned x, unsigned y,  enum pipe_mpeg12_dct_type coding, int quantizer_scale)
 {
    short dest[64];
 
@@ -1297,10 +1236,10 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
    bs->ycbcr_stream[cc]->coding = coding;
 
    memset(dest, 0, sizeof(int16_t) * 64);
-   if (picture->mpeg1)
-      get_mpeg1_non_intra_block(bs, picture, scan, quantizer_scale, dest);
+   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
+      get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
    else
-      get_non_intra_block(bs, picture, scan, quantizer_scale, dest);
+      get_non_intra_block(bs, quantizer_scale, dest);
 
    memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
 
@@ -1532,22 +1471,27 @@ do {                                                    \
       routine(bs, picture->f_code[1], &mv_bwd);         \
 } while (0)
 
-#define NEXT_MACROBLOCK                                \
-do {                                           \
-   bs->mv_stream[0][x+y*bs->width] = mv_fwd;    \
-   bs->mv_stream[1][x+y*bs->width] = mv_bwd;    \
-   ++x;                                                \
-   if (x == bs->width) {                       \
-      ++y;                                      \
-      if (y >= bs->height)                      \
-         return false;                          \
-      x = 0;                                    \
-   }                                            \
-} while (0)
+static inline void
+store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos,
+                    struct pipe_motionvector *mv_fwd,
+                    struct pipe_motionvector *mv_bwd)
+{
+   bs->mv_stream[0][*mv_pos].top = mv_fwd->top;
+   bs->mv_stream[0][*mv_pos].bottom =
+      mv_fwd->top.field_select == PIPE_VIDEO_FRAME ?
+      mv_fwd->top : mv_fwd->bottom;
+
+   bs->mv_stream[1][*mv_pos].top = mv_bwd->top;
+   bs->mv_stream[1][*mv_pos].bottom =
+      mv_bwd->top.field_select == PIPE_VIDEO_FRAME ?
+      mv_bwd->top : mv_bwd->bottom;
+
+   (*mv_pos)++;
+}
 
 static inline bool
 slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-           int *quantizer_scale, int *x, int *y)
+           int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos)
 {
    const MBAtab * mba;
 
@@ -1601,11 +1545,13 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
    if (*y > bs->height)
       return false;
 
+   *mv_pos = *x + *y * bs->width;
+
    return true;
 }
 
 static inline bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, const int scan[64])
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 {
    enum pipe_video_field_select default_field_select;
    struct pipe_motionvector mv_fwd, mv_bwd;
@@ -1615,7 +1561,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
    int dc_dct_pred[3] = { 0, 0, 0 };
    int quantizer_scale;
 
-   int x, y;
+   unsigned x, y, mv_pos;
 
    switch(picture->picture_structure) {
    case TOP_FIELD:
@@ -1631,7 +1577,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
       break;
    }
 
-   if (!slice_init(bs, picture, &quantizer_scale, &x, &y))
+   if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos))
       return false;
 
    mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
@@ -1692,12 +1638,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
 
          // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1708,7 +1654,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          if (picture->picture_structure == FRAME_PICTURE)
             switch (macroblock_modes & MOTION_TYPE_MASK) {
             case MC_FRAME:
-               if (picture->mpeg1) {
+               if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
                   MOTION_CALL(motion_mp1, macroblock_modes);
                } else {
                   MOTION_CALL(motion_fr_frame, macroblock_modes);
@@ -1755,23 +1701,29 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
 
             // TODO  optimize not fully used for idct accel only mc.
             if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, quantizer_scale, dct_type); // cc0  luma 0
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
             if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, quantizer_scale, dct_type); // cc0 luma 1
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
             if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, quantizer_scale, dct_type); // cc0 luma 2
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
             if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
             if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, scan, 1, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc1 croma
+               slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
             if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, scan, 2, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc2 croma
+               slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
          }
 
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
       }
 
-      NEXT_MACROBLOCK;
+      store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
+      if (++x >= bs->width) {
+         ++y;
+         if (y >= bs->height)
+            return false;
+         x -= bs->width;
+      }
 
       vl_vlc_needbits(&bs->vlc);
       mba_inc = 0;
@@ -1807,10 +1759,18 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
             mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
             mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
          }
+
+         x += mba_inc;
          do {
-            NEXT_MACROBLOCK;
+            store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
          } while (--mba_inc);
       }
+      while (x >= bs->width) {
+         ++y;
+         if (y >= bs->height)
+            return false;
+         x -= bs->width;
+      }
    }
 }
 
@@ -1864,8 +1824,6 @@ void
 vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
                    struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
 {
-   const int *scan;
-
    assert(bs);
    assert(num_ycbcr_blocks);
    assert(buffer && num_bytes);
@@ -1874,7 +1832,5 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe
 
    vl_vlc_init(&bs->vlc, buffer, num_bytes);
 
-   scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
-
-   while(decode_slice(bs, picture, scan));
+   while(decode_slice(bs, picture));
 }