src/gallium/auxiliary/vl/vl_mpeg12_decoder.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 Younes Manton.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <math.h>
  29 #include <assert.h>
  30
  31 #include <util/u_memory.h>
  32 #include <util/u_rect.h>
  33 #include <util/u_video.h>
  34
  35 #include "vl_mpeg12_decoder.h"
  36 #include "vl_defines.h"
  37
  38 #define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
  39 #define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
  40
  41 struct format_config {
  42    enum pipe_format zscan_source_format;
  43    enum pipe_format idct_source_format;
  44    enum pipe_format mc_source_format;
  45
  46    float idct_scale;
  47    float mc_scale;
  48 };
  49
  50 static const struct format_config bitstream_format_config[] = {
  51    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
  52    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
  53    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
  54    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
  55 };
  56
  57 static const unsigned num_bitstream_format_configs =
  58    sizeof(bitstream_format_config) / sizeof(struct format_config);
  59
  60 static const struct format_config idct_format_config[] = {
  61    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
  62    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
  63    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
  64    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
  65 };
  66
  67 static const unsigned num_idct_format_configs =
  68    sizeof(idct_format_config) / sizeof(struct format_config);
  69
  70 static const struct format_config mc_format_config[] = {
  71    //{ PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SSCALED, 0.0f, SCALE_FACTOR_SSCALED },
  72    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SNORM, 0.0f, SCALE_FACTOR_SNORM }
  73 };
  74
  75 static const unsigned num_mc_format_configs =
  76    sizeof(mc_format_config) / sizeof(struct format_config);
  77
  78 static bool
  79 init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
  80 {
  81    enum pipe_format formats[3];
  82
  83    struct pipe_sampler_view **source;
  84    struct pipe_surface **destination;
  85
  86    struct vl_mpeg12_decoder *dec;
  87
  88    unsigned i;
  89
  90    assert(buffer);
  91
  92    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
  93
  94    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
  95    buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
  96                                                dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
  97                                                align(dec->max_blocks, dec->blocks_per_line) / dec->blocks_per_line,
  98                                                1, PIPE_VIDEO_CHROMA_FORMAT_444,
  99                                                formats, PIPE_USAGE_STATIC);
 100    if (!buffer->zscan_source)
 101       goto error_source;
 102
 103    source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
 104    if (!source)
 105       goto error_sampler;
 106
 107    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 108       destination = dec->idct_source->get_surfaces(dec->idct_source);
 109    else
 110       destination = dec->mc_source->get_surfaces(dec->mc_source);
 111
 112    if (!destination)
 113       goto error_surface;
 114
 115    for (i = 0; i < VL_MAX_PLANES; ++i)
 116       if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
 117                                 &buffer->zscan[i], source[i], destination[i]))
 118          goto error_plane;
 119
 120    return true;
 121
 122 error_plane:
 123    for (; i > 0; --i)
 124       vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
 125
 126 error_surface:
 127 error_sampler:
 128    buffer->zscan_source->destroy(buffer->zscan_source);
 129
 130 error_source:
 131    return false;
 132 }
 133
 134 static void
 135 cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
 136 {
 137    unsigned i;
 138
 139    assert(buffer);
 140
 141    for (i = 0; i < VL_MAX_PLANES; ++i)
 142       vl_zscan_cleanup_buffer(&buffer->zscan[i]);
 143    buffer->zscan_source->destroy(buffer->zscan_source);
 144 }
 145
 146 static bool
 147 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 148 {
 149    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
 150
 151    struct vl_mpeg12_decoder *dec;
 152
 153    unsigned i;
 154
 155    assert(buffer);
 156
 157    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
 158
 159    idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
 160    if (!idct_source_sv)
 161       goto error_source_sv;
 162
 163    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
 164    if (!mc_source_sv)
 165       goto error_mc_source_sv;
 166
 167    for (i = 0; i < 3; ++i)
 168       if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
 169                                &buffer->idct[i], idct_source_sv[i],
 170                                mc_source_sv[i]))
 171          goto error_plane;
 172
 173    return true;
 174
 175 error_plane:
 176    for (; i > 0; --i)
 177       vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
 178
 179 error_mc_source_sv:
 180 error_source_sv:
 181    return false;
 182 }
 183
 184 static void
 185 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 186 {
 187    struct vl_mpeg12_decoder *dec;
 188    assert(buf);
 189
 190    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
 191    assert(dec);
 192
 193    vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
 194    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
 195    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
 196 }
 197
 198 static bool
 199 init_mc_buffer(struct vl_mpeg12_buffer *buf)
 200 {
 201    struct vl_mpeg12_decoder *dec;
 202
 203    assert(buf);
 204
 205    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
 206    assert(dec);
 207
 208    if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
 209       goto error_mc_y;
 210
 211    if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1]))
 212       goto error_mc_cb;
 213
 214    if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2]))
 215       goto error_mc_cr;
 216
 217    return true;
 218
 219 error_mc_cr:
 220    vl_mc_cleanup_buffer(&buf->mc[1]);
 221
 222 error_mc_cb:
 223    vl_mc_cleanup_buffer(&buf->mc[0]);
 224
 225 error_mc_y:
 226    return false;
 227 }
 228
 229 static void
 230 cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
 231 {
 232    unsigned i;
 233
 234    assert(buf);
 235
 236    for (i = 0; i < VL_MAX_PLANES; ++i)
 237       vl_mc_cleanup_buffer(&buf->mc[i]);
 238 }
 239
 240 static void
 241 vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 242 {
 243    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 244    struct vl_mpeg12_decoder *dec;
 245
 246    assert(buf);
 247
 248    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
 249    assert(dec);
 250
 251    cleanup_zscan_buffer(buf);
 252
 253    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 254       cleanup_idct_buffer(buf);
 255
 256    cleanup_mc_buffer(buf);
 257
 258    vl_vb_cleanup(&buf->vertex_stream);
 259
 260    FREE(buf);
 261 }
 262
 263 static void
 264 vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
 265 {
 266    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 267    struct vl_mpeg12_decoder *dec;
 268
 269    struct pipe_sampler_view **sampler_views;
 270    unsigned i;
 271
 272    assert(buf);
 273
 274    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 275    assert(dec);
 276
 277    vl_vb_map(&buf->vertex_stream, dec->pipe);
 278
 279    sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
 280
 281    assert(sampler_views);
 282
 283    for (i = 0; i < VL_MAX_PLANES; ++i) {
 284       struct pipe_resource *tex = sampler_views[i]->texture;
 285       struct pipe_box rect =
 286       {
 287          0, 0, 0,
 288          tex->width0,
 289          tex->height0,
 290          1
 291       };
 292
 293       buf->tex_transfer[i] = dec->pipe->get_transfer
 294       (
 295          dec->pipe, tex,
 296          0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
 297          &rect
 298       );
 299
 300       buf->texels[i] = dec->pipe->transfer_map(dec->pipe, buf->tex_transfer[i]);
 301    }
 302
 303    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
 304       struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
 305       struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
 306
 307       for (i = 0; i < VL_MAX_PLANES; ++i)
 308          ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
 309
 310       for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
 311          mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
 312
 313       vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
 314    } else {
 315       static const uint8_t dummy_quant[64] = {
 316          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 317          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 318          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 319          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 320          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 321          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 322          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 323          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
 324       };
 325
 326       for (i = 0; i < VL_MAX_PLANES; ++i) {
 327          vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
 328          vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
 329       }
 330    }
 331 }
 332
 333 static struct pipe_ycbcr_block *
 334 vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
 335 {
 336    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 337
 338    assert(buf);
 339
 340    return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
 341 }
 342
 343 static short *
 344 vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
 345 {
 346    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 347
 348    assert(buf);
 349    assert(component < VL_MAX_PLANES);
 350
 351    return buf->texels[component];
 352 }
 353
 354 static unsigned
 355 vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
 356 {
 357    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 358
 359    assert(buf);
 360
 361    return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
 362 }
 363
 364 static struct pipe_motionvector *
 365 vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
 366 {
 367    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 368
 369    assert(buf);
 370
 371    return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
 372 }
 373
 374 static void
 375 vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
 376                                   unsigned num_bytes, const void *data,
 377                                   struct pipe_mpeg12_picture_desc *picture,
 378                                   unsigned num_ycbcr_blocks[3])
 379 {
 380    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 381    uint8_t intra_quantizer_matrix[64];
 382    struct vl_mpeg12_decoder *dec;
 383    unsigned i;
 384
 385    assert(buf);
 386
 387    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 388    assert(dec);
 389
 390    memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
 391    intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision);
 392
 393    for (i = 0; i < VL_MAX_PLANES; ++i) {
 394       vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
 395       vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix);
 396    }
 397
 398    vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
 399 }
 400
 401 static void
 402 vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
 403 {
 404    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 405    struct vl_mpeg12_decoder *dec;
 406    unsigned i;
 407
 408    assert(buf);
 409
 410    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 411    assert(dec);
 412
 413    vl_vb_unmap(&buf->vertex_stream, dec->pipe);
 414
 415    for (i = 0; i < VL_MAX_PLANES; ++i) {
 416       dec->pipe->transfer_unmap(dec->pipe, buf->tex_transfer[i]);
 417       dec->pipe->transfer_destroy(dec->pipe, buf->tex_transfer[i]);
 418    }
 419 }
 420
 421 static void
 422 vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 423 {
 424    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
 425
 426    assert(decoder);
 427
 428    /* Asserted in softpipe_delete_fs_state() for some reason */
 429    dec->pipe->bind_vs_state(dec->pipe, NULL);
 430    dec->pipe->bind_fs_state(dec->pipe, NULL);
 431
 432    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 433    dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr);
 434
 435    vl_mc_cleanup(&dec->mc_y);
 436    vl_mc_cleanup(&dec->mc_c);
 437    dec->mc_source->destroy(dec->mc_source);
 438
 439    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 440       vl_idct_cleanup(&dec->idct_y);
 441       vl_idct_cleanup(&dec->idct_c);
 442       dec->idct_source->destroy(dec->idct_source);
 443    }
 444
 445    vl_zscan_cleanup(&dec->zscan_y);
 446    vl_zscan_cleanup(&dec->zscan_c);
 447
 448    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
 449    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
 450
 451    pipe_resource_reference(&dec->quads.buffer, NULL);
 452    pipe_resource_reference(&dec->pos.buffer, NULL);
 453
 454    pipe_sampler_view_reference(&dec->zscan_linear, NULL);
 455    pipe_sampler_view_reference(&dec->zscan_normal, NULL);
 456    pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
 457
 458    FREE(dec);
 459 }
 460
 461 static struct pipe_video_decode_buffer *
 462 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 463 {
 464    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
 465    struct vl_mpeg12_buffer *buffer;
 466
 467    assert(dec);
 468
 469    buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
 470    if (buffer == NULL)
 471       return NULL;
 472
 473    buffer->base.decoder = decoder;
 474    buffer->base.destroy = vl_mpeg12_buffer_destroy;
 475    buffer->base.map = vl_mpeg12_buffer_map;
 476    buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
 477    buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
 478    buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
 479    buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
 480    buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
 481    buffer->base.unmap = vl_mpeg12_buffer_unmap;
 482
 483    if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
 484                    dec->base.width / MACROBLOCK_WIDTH,
 485                    dec->base.height / MACROBLOCK_HEIGHT))
 486       goto error_vertex_buffer;
 487
 488    if (!init_mc_buffer(buffer))
 489       goto error_mc;
 490
 491    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 492       if (!init_idct_buffer(buffer))
 493          goto error_idct;
 494
 495    if (!init_zscan_buffer(buffer))
 496       goto error_zscan;
 497
 498    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
 499       vl_mpg12_bs_init(&buffer->bs,
 500                        dec->base.width / MACROBLOCK_WIDTH,
 501                        dec->base.height / MACROBLOCK_HEIGHT);
 502
 503    return &buffer->base;
 504
 505 error_zscan:
 506    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 507       cleanup_idct_buffer(buffer);
 508
 509 error_idct:
 510    cleanup_mc_buffer(buffer);
 511
 512 error_mc:
 513    vl_vb_cleanup(&buffer->vertex_stream);
 514
 515 error_vertex_buffer:
 516    FREE(buffer);
 517    return NULL;
 518 }
 519
 520 static void
 521 vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 522                                unsigned num_ycbcr_blocks[3],
 523                                struct pipe_video_buffer *refs[2],
 524                                struct pipe_video_buffer *dst)
 525 {
 526    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
 527    struct vl_mpeg12_decoder *dec;
 528
 529    struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
 530    struct pipe_surface **surfaces;
 531
 532    struct pipe_vertex_buffer vb[3];
 533
 534    unsigned i, j, component;
 535    unsigned nr_components;
 536
 537    assert(buf);
 538
 539    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 540    assert(dec);
 541
 542    for (i = 0; i < 2; ++i)
 543       sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
 544
 545    vb[0] = dec->quads;
 546    vb[1] = dec->pos;
 547
 548    surfaces = dst->get_surfaces(dst);
 549
 550    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv);
 551    for (i = 0; i < VL_MAX_PLANES; ++i) {
 552       if (!surfaces[i]) continue;
 553
 554       vl_mc_set_surface(&buf->mc[i], surfaces[i]);
 555
 556       for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
 557          if (!sv[j]) continue;
 558
 559          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
 560          dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 561
 562          vl_mc_render_ref(&buf->mc[i], sv[j][i]);
 563       }
 564    }
 565
 566    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
 567    for (i = 0; i < VL_MAX_PLANES; ++i) {
 568       if (!num_ycbcr_blocks[i]) continue;
 569
 570       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
 571       dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
 572
 573       vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
 574
 575       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 576          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
 577    }
 578
 579    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
 580    for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
 581       if (!surfaces[i]) continue;
 582
 583       nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
 584       for (j = 0; j < nr_components; ++j, ++component) {
 585          if (!num_ycbcr_blocks[i]) continue;
 586
 587          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
 588          dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
 589
 590          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 591             vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
 592          else {
 593             dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]);
 594             dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr);
 595          }
 596          vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
 597       }
 598    }
 599 }
 600
 601 static bool
 602 init_pipe_state(struct vl_mpeg12_decoder *dec)
 603 {
 604    struct pipe_depth_stencil_alpha_state dsa;
 605    struct pipe_sampler_state sampler;
 606    unsigned i;
 607
 608    assert(dec);
 609
 610    memset(&dsa, 0, sizeof dsa);
 611    dsa.depth.enabled = 0;
 612    dsa.depth.writemask = 0;
 613    dsa.depth.func = PIPE_FUNC_ALWAYS;
 614    for (i = 0; i < 2; ++i) {
 615       dsa.stencil[i].enabled = 0;
 616       dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
 617       dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
 618       dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
 619       dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
 620       dsa.stencil[i].valuemask = 0;
 621       dsa.stencil[i].writemask = 0;
 622    }
 623    dsa.alpha.enabled = 0;
 624    dsa.alpha.func = PIPE_FUNC_ALWAYS;
 625    dsa.alpha.ref_value = 0;
 626    dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa);
 627    dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 628
 629    memset(&sampler, 0, sizeof(sampler));
 630    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
 631    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
 632    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
 633    sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
 634    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
 635    sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
 636    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
 637    sampler.compare_func = PIPE_FUNC_ALWAYS;
 638    sampler.normalized_coords = 1;
 639    dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler);
 640    if (!dec->sampler_ycbcr)
 641       return false;
 642
 643    return true;
 644 }
 645
 646 static const struct format_config*
 647 find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config configs[], unsigned num_configs)
 648 {
 649    struct pipe_screen *screen;
 650    unsigned i;
 651
 652    assert(dec);
 653
 654    screen = dec->pipe->screen;
 655
 656    for (i = 0; i < num_configs; ++i) {
 657       if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D,
 658                                        1, PIPE_BIND_SAMPLER_VIEW))
 659          continue;
 660
 661       if (configs[i].idct_source_format != PIPE_FORMAT_NONE) {
 662          if (!screen->is_format_supported(screen, configs[i].idct_source_format, PIPE_TEXTURE_2D,
 663                                           1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
 664             continue;
 665
 666          if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_3D,
 667                                           1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
 668             continue;
 669       } else {
 670          if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_2D,
 671                                           1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
 672             continue;
 673       }
 674       return &configs[i];
 675    }
 676
 677    return NULL;
 678 }
 679
 680 static bool
 681 init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 682 {
 683    const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
 684    unsigned num_channels;
 685
 686    assert(dec);
 687
 688    dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
 689    dec->max_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
 690
 691    dec->zscan_source_format = format_config->zscan_source_format;
 692    dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
 693    dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
 694    dec->zscan_alternate = vl_zscan_layout(dec->pipe, vl_zscan_alternate, dec->blocks_per_line);
 695
 696    num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
 697
 698    if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
 699                       dec->blocks_per_line, dec->max_blocks, num_channels))
 700       return false;
 701
 702    if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
 703                       dec->blocks_per_line, dec->max_blocks, num_channels))
 704       return false;
 705
 706    return true;
 707 }
 708
 709 static bool
 710 init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 711 {
 712    unsigned nr_of_idct_render_targets, max_inst;
 713    enum pipe_format formats[3];
 714
 715    struct pipe_sampler_view *matrix = NULL;
 716
 717    nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
 718    max_inst = dec->pipe->screen->get_shader_param(dec->pipe->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
 719
 720    // Just assume we need 32 inst per render target, not 100% true, but should work in most cases
 721    if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4)
 722       // more than 4 render targets usually doesn't makes any seens
 723       nr_of_idct_render_targets = 4;
 724    else
 725       nr_of_idct_render_targets = 1;
 726
 727    formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
 728    dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
 729                                            dec->base.width / 4, dec->base.height, 1,
 730                                            dec->base.chroma_format,
 731                                            formats, PIPE_USAGE_STATIC);
 732    if (!dec->idct_source)
 733       goto error_idct_source;
 734
 735    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
 736    dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
 737                                          dec->base.width / nr_of_idct_render_targets,
 738                                          dec->base.height / 4, nr_of_idct_render_targets,
 739                                          dec->base.chroma_format,
 740                                          formats, PIPE_USAGE_STATIC);
 741
 742    if (!dec->mc_source)
 743       goto error_mc_source;
 744
 745    if (!(matrix = vl_idct_upload_matrix(dec->pipe, format_config->idct_scale)))
 746       goto error_matrix;
 747
 748    if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
 749                      nr_of_idct_render_targets, matrix, matrix))
 750       goto error_y;
 751
 752    if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
 753                     nr_of_idct_render_targets, matrix, matrix))
 754       goto error_c;
 755
 756    pipe_sampler_view_reference(&matrix, NULL);
 757
 758    return true;
 759
 760 error_c:
 761    vl_idct_cleanup(&dec->idct_y);
 762
 763 error_y:
 764    pipe_sampler_view_reference(&matrix, NULL);
 765
 766 error_matrix:
 767    dec->mc_source->destroy(dec->mc_source);
 768
 769 error_mc_source:
 770    dec->idct_source->destroy(dec->idct_source);
 771
 772 error_idct_source:
 773    return false;
 774 }
 775
 776 static bool
 777 init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 778 {
 779    enum pipe_format formats[3];
 780
 781    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
 782    dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
 783                                          dec->base.width, dec->base.height, 1,
 784                                          dec->base.chroma_format,
 785                                          formats, PIPE_USAGE_STATIC);
 786
 787    return dec->mc_source != NULL;
 788 }
 789
 790 static void
 791 mc_vert_shader_callback(void *priv, struct vl_mc *mc,
 792                         struct ureg_program *shader,
 793                         unsigned first_output,
 794                         struct ureg_dst tex)
 795 {
 796    struct vl_mpeg12_decoder *dec = priv;
 797    struct ureg_dst o_vtex;
 798
 799    assert(priv && mc);
 800    assert(shader);
 801
 802    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 803       struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
 804       vl_idct_stage2_vert_shader(idct, shader, first_output, tex);
 805    } else {
 806       o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output);
 807       ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex));
 808    }
 809 }
 810
 811 static void
 812 mc_frag_shader_callback(void *priv, struct vl_mc *mc,
 813                         struct ureg_program *shader,
 814                         unsigned first_input,
 815                         struct ureg_dst dst)
 816 {
 817    struct vl_mpeg12_decoder *dec = priv;
 818    struct ureg_src src, sampler;
 819
 820    assert(priv && mc);
 821    assert(shader);
 822
 823    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 824       struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
 825       vl_idct_stage2_frag_shader(idct, shader, first_input, dst);
 826    } else {
 827       src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR);
 828       sampler = ureg_DECL_sampler(shader, 0);
 829       ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler);
 830    }
 831 }
 832
 833 struct pipe_video_decoder *
 834 vl_create_mpeg12_decoder(struct pipe_video_context *context,
 835                          struct pipe_context *pipe,
 836                          enum pipe_video_profile profile,
 837                          enum pipe_video_entrypoint entrypoint,
 838                          enum pipe_video_chroma_format chroma_format,
 839                          unsigned width, unsigned height)
 840 {
 841    const struct format_config *format_config;
 842    struct vl_mpeg12_decoder *dec;
 843
 844    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
 845
 846    dec = CALLOC_STRUCT(vl_mpeg12_decoder);
 847
 848    if (!dec)
 849       return NULL;
 850
 851    dec->base.context = context;
 852    dec->base.profile = profile;
 853    dec->base.entrypoint = entrypoint;
 854    dec->base.chroma_format = chroma_format;
 855    dec->base.width = width;
 856    dec->base.height = height;
 857
 858    dec->base.destroy = vl_mpeg12_destroy;
 859    dec->base.create_buffer = vl_mpeg12_create_buffer;
 860    dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
 861
 862    dec->pipe = pipe;
 863
 864    dec->quads = vl_vb_upload_quads(dec->pipe);
 865    dec->pos = vl_vb_upload_pos(
 866       dec->pipe,
 867       dec->base.width / MACROBLOCK_WIDTH,
 868       dec->base.height / MACROBLOCK_HEIGHT
 869    );
 870
 871    dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->pipe);
 872    dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
 873
 874    /* TODO: Implement 422, 444 */
 875    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 876
 877    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
 878       dec->chroma_width = dec->base.width / 2;
 879       dec->chroma_height = dec->base.height / 2;
 880    } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
 881       dec->chroma_width = dec->base.width;
 882       dec->chroma_height = dec->base.height / 2;
 883    } else {
 884       dec->chroma_width = dec->base.width;
 885       dec->chroma_height = dec->base.height;
 886    }
 887
 888    switch (entrypoint) {
 889    case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
 890       format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
 891       break;
 892
 893    case PIPE_VIDEO_ENTRYPOINT_IDCT:
 894       format_config = find_format_config(dec, idct_format_config, num_idct_format_configs);
 895       break;
 896
 897    case PIPE_VIDEO_ENTRYPOINT_MC:
 898       format_config = find_format_config(dec, mc_format_config, num_mc_format_configs);
 899       break;
 900
 901    default:
 902       assert(0);
 903       return NULL;
 904    }
 905
 906    if (!format_config)
 907       return NULL;
 908
 909    if (!init_zscan(dec, format_config))
 910       goto error_zscan;
 911
 912    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 913       if (!init_idct(dec, format_config))
 914          goto error_sources;
 915    } else {
 916       if (!init_mc_source_widthout_idct(dec, format_config))
 917          goto error_sources;
 918    }
 919
 920    if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, format_config->mc_scale,
 921                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
 922       goto error_mc_y;
 923
 924    // TODO
 925    if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, format_config->mc_scale,
 926                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
 927       goto error_mc_c;
 928
 929    if (!init_pipe_state(dec))
 930       goto error_pipe_state;
 931
 932    return &dec->base;
 933
 934 error_pipe_state:
 935    vl_mc_cleanup(&dec->mc_c);
 936
 937 error_mc_c:
 938    vl_mc_cleanup(&dec->mc_y);
 939
 940 error_mc_y:
 941    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 942       vl_idct_cleanup(&dec->idct_y);
 943       vl_idct_cleanup(&dec->idct_c);
 944       dec->idct_source->destroy(dec->idct_source);
 945    }
 946    dec->mc_source->destroy(dec->mc_source);
 947
 948 error_sources:
 949    vl_zscan_cleanup(&dec->zscan_y);
 950    vl_zscan_cleanup(&dec->zscan_c);
 951
 952 error_zscan:
 953    FREE(dec);
 954    return NULL;
 955 }