src/gallium/auxiliary/vl/vl_mpeg12_decoder.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 Younes Manton.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 #include <math.h>
  29 #include <assert.h>
  30
  31 #include <util/u_memory.h>
  32 #include <util/u_rect.h>
  33 #include <util/u_video.h>
  34
  35 #include "vl_mpeg12_decoder.h"
  36 #include "vl_defines.h"
  37
  38 #define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
  39 #define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
  40
  41 struct format_config {
  42    enum pipe_format zscan_source_format;
  43    enum pipe_format idct_source_format;
  44    enum pipe_format mc_source_format;
  45
  46    float idct_scale;
  47    float mc_scale;
  48 };
  49
  50 static const struct format_config bitstream_format_config[] = {
  51    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
  52    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
  53    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
  54    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
  55 };
  56
  57 static const unsigned num_bitstream_format_configs =
  58    sizeof(bitstream_format_config) / sizeof(struct format_config);
  59
  60 static const struct format_config idct_format_config[] = {
  61    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
  62    { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
  63    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
  64    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
  65 };
  66
  67 static const unsigned num_idct_format_configs =
  68    sizeof(idct_format_config) / sizeof(struct format_config);
  69
  70 static const struct format_config mc_format_config[] = {
  71    //{ PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SSCALED, 0.0f, SCALE_FACTOR_SSCALED },
  72    { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SNORM, 0.0f, SCALE_FACTOR_SNORM }
  73 };
  74
  75 static const unsigned num_mc_format_configs =
  76    sizeof(mc_format_config) / sizeof(struct format_config);
  77
  78 static bool
  79 init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
  80 {
  81    enum pipe_format formats[3];
  82
  83    struct pipe_sampler_view **source;
  84    struct pipe_surface **destination;
  85
  86    struct vl_mpeg12_decoder *dec;
  87
  88    unsigned i;
  89
  90    assert(buffer);
  91
  92    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
  93
  94    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
  95    buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
  96                                                dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
  97                                                align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
  98                                                1, PIPE_VIDEO_CHROMA_FORMAT_444,
  99                                                formats, PIPE_USAGE_STATIC);
 100    if (!buffer->zscan_source)
 101       goto error_source;
 102
 103    source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
 104    if (!source)
 105       goto error_sampler;
 106
 107    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 108       destination = dec->idct_source->get_surfaces(dec->idct_source);
 109    else
 110       destination = dec->mc_source->get_surfaces(dec->mc_source);
 111
 112    if (!destination)
 113       goto error_surface;
 114
 115    for (i = 0; i < VL_MAX_PLANES; ++i)
 116       if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
 117                                 &buffer->zscan[i], source[i], destination[i]))
 118          goto error_plane;
 119
 120    return true;
 121
 122 error_plane:
 123    for (; i > 0; --i)
 124       vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
 125
 126 error_surface:
 127 error_sampler:
 128    buffer->zscan_source->destroy(buffer->zscan_source);
 129
 130 error_source:
 131    return false;
 132 }
 133
 134 static void
 135 cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
 136 {
 137    unsigned i;
 138
 139    assert(buffer);
 140
 141    for (i = 0; i < VL_MAX_PLANES; ++i)
 142       vl_zscan_cleanup_buffer(&buffer->zscan[i]);
 143    buffer->zscan_source->destroy(buffer->zscan_source);
 144 }
 145
 146 static bool
 147 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 148 {
 149    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
 150
 151    struct vl_mpeg12_decoder *dec;
 152
 153    unsigned i;
 154
 155    assert(buffer);
 156
 157    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
 158
 159    idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
 160    if (!idct_source_sv)
 161       goto error_source_sv;
 162
 163    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
 164    if (!mc_source_sv)
 165       goto error_mc_source_sv;
 166
 167    for (i = 0; i < 3; ++i)
 168       if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
 169                                &buffer->idct[i], idct_source_sv[i],
 170                                mc_source_sv[i]))
 171          goto error_plane;
 172
 173    return true;
 174
 175 error_plane:
 176    for (; i > 0; --i)
 177       vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
 178
 179 error_mc_source_sv:
 180 error_source_sv:
 181    return false;
 182 }
 183
 184 static void
 185 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 186 {
 187    struct vl_mpeg12_decoder *dec;
 188    assert(buf);
 189
 190    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
 191    assert(dec);
 192
 193    vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
 194    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
 195    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
 196 }
 197
 198 static bool
 199 init_mc_buffer(struct vl_mpeg12_buffer *buf)
 200 {
 201    struct vl_mpeg12_decoder *dec;
 202
 203    assert(buf);
 204
 205    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
 206    assert(dec);
 207
 208    if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
 209       goto error_mc_y;
 210
 211    if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1]))
 212       goto error_mc_cb;
 213
 214    if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2]))
 215       goto error_mc_cr;
 216
 217    return true;
 218
 219 error_mc_cr:
 220    vl_mc_cleanup_buffer(&buf->mc[1]);
 221
 222 error_mc_cb:
 223    vl_mc_cleanup_buffer(&buf->mc[0]);
 224
 225 error_mc_y:
 226    return false;
 227 }
 228
 229 static void
 230 cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
 231 {
 232    unsigned i;
 233
 234    assert(buf);
 235
 236    for (i = 0; i < VL_MAX_PLANES; ++i)
 237       vl_mc_cleanup_buffer(&buf->mc[i]);
 238 }
 239
 240 static void
 241 vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 242 {
 243    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 244    struct vl_mpeg12_decoder *dec;
 245
 246    assert(buf);
 247
 248    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
 249    assert(dec);
 250
 251    cleanup_zscan_buffer(buf);
 252
 253    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 254       cleanup_idct_buffer(buf);
 255
 256    cleanup_mc_buffer(buf);
 257
 258    vl_vb_cleanup(&buf->vertex_stream);
 259
 260    FREE(buf);
 261 }
 262
 263 static void
 264 vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
 265 {
 266    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 267    struct vl_mpeg12_decoder *dec;
 268
 269    struct pipe_sampler_view **sampler_views;
 270    unsigned i;
 271
 272    assert(buf);
 273
 274    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 275    assert(dec);
 276
 277    vl_vb_map(&buf->vertex_stream, dec->pipe);
 278
 279    sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
 280
 281    assert(sampler_views);
 282
 283    for (i = 0; i < VL_MAX_PLANES; ++i) {
 284       struct pipe_resource *tex = sampler_views[i]->texture;
 285       struct pipe_box rect =
 286       {
 287          0, 0, 0,
 288          tex->width0,
 289          tex->height0,
 290          1
 291       };
 292
 293       buf->tex_transfer[i] = dec->pipe->get_transfer
 294       (
 295          dec->pipe, tex,
 296          0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
 297          &rect
 298       );
 299
 300       buf->texels[i] = dec->pipe->transfer_map(dec->pipe, buf->tex_transfer[i]);
 301    }
 302
 303    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
 304       struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
 305       struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
 306
 307       for (i = 0; i < VL_MAX_PLANES; ++i)
 308          ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
 309
 310       for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
 311          mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
 312
 313       vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
 314    } else {
 315       static const uint8_t dummy_quant[64] = {
 316          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 317          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 318          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 319          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 320          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 321          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 322          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
 323          0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
 324       };
 325
 326       for (i = 0; i < VL_MAX_PLANES; ++i) {
 327          vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
 328          vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
 329       }
 330    }
 331 }
 332
 333 static void
 334 vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer,
 335                                   uint8_t intra_matrix[64],
 336                                   uint8_t non_intra_matrix[64])
 337 {
 338    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 339    unsigned i;
 340
 341    for (i = 0; i < VL_MAX_PLANES; ++i)
 342       vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, non_intra_matrix);
 343 }
 344
 345 static struct pipe_ycbcr_block *
 346 vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
 347 {
 348    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 349
 350    assert(buf);
 351
 352    return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
 353 }
 354
 355 static short *
 356 vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
 357 {
 358    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 359
 360    assert(buf);
 361    assert(component < VL_MAX_PLANES);
 362
 363    return buf->texels[component];
 364 }
 365
 366 static unsigned
 367 vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
 368 {
 369    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 370
 371    assert(buf);
 372
 373    return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
 374 }
 375
 376 static struct pipe_motionvector *
 377 vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
 378 {
 379    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 380
 381    assert(buf);
 382
 383    return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
 384 }
 385
 386 static void
 387 vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
 388                                   unsigned num_bytes, const void *data,
 389                                   struct pipe_mpeg12_picture_desc *picture,
 390                                   unsigned num_ycbcr_blocks[3])
 391 {
 392    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 393    struct vl_mpeg12_decoder *dec;
 394    unsigned i;
 395
 396    assert(buf);
 397
 398    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 399    assert(dec);
 400
 401    for (i = 0; i < VL_MAX_PLANES; ++i)
 402       vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
 403
 404    vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
 405 }
 406
 407 static void
 408 vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
 409 {
 410    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 411    struct vl_mpeg12_decoder *dec;
 412    unsigned i;
 413
 414    assert(buf);
 415
 416    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 417    assert(dec);
 418
 419    vl_vb_unmap(&buf->vertex_stream, dec->pipe);
 420
 421    for (i = 0; i < VL_MAX_PLANES; ++i) {
 422       dec->pipe->transfer_unmap(dec->pipe, buf->tex_transfer[i]);
 423       dec->pipe->transfer_destroy(dec->pipe, buf->tex_transfer[i]);
 424    }
 425 }
 426
 427 static void
 428 vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 429 {
 430    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
 431
 432    assert(decoder);
 433
 434    /* Asserted in softpipe_delete_fs_state() for some reason */
 435    dec->pipe->bind_vs_state(dec->pipe, NULL);
 436    dec->pipe->bind_fs_state(dec->pipe, NULL);
 437
 438    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 439    dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr);
 440
 441    vl_mc_cleanup(&dec->mc_y);
 442    vl_mc_cleanup(&dec->mc_c);
 443    dec->mc_source->destroy(dec->mc_source);
 444
 445    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 446       vl_idct_cleanup(&dec->idct_y);
 447       vl_idct_cleanup(&dec->idct_c);
 448       dec->idct_source->destroy(dec->idct_source);
 449    }
 450
 451    vl_zscan_cleanup(&dec->zscan_y);
 452    vl_zscan_cleanup(&dec->zscan_c);
 453
 454    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
 455    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
 456
 457    pipe_resource_reference(&dec->quads.buffer, NULL);
 458    pipe_resource_reference(&dec->pos.buffer, NULL);
 459
 460    pipe_sampler_view_reference(&dec->zscan_linear, NULL);
 461    pipe_sampler_view_reference(&dec->zscan_normal, NULL);
 462    pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
 463
 464    FREE(dec);
 465 }
 466
 467 static struct pipe_video_decode_buffer *
 468 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 469 {
 470    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
 471    struct vl_mpeg12_buffer *buffer;
 472
 473    assert(dec);
 474
 475    buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
 476    if (buffer == NULL)
 477       return NULL;
 478
 479    buffer->base.decoder = decoder;
 480    buffer->base.destroy = vl_mpeg12_buffer_destroy;
 481    buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame;
 482    buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix;
 483    buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
 484    buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
 485    buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
 486    buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
 487    buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
 488    buffer->base.end_frame = vl_mpeg12_buffer_end_frame;
 489
 490    if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
 491                    dec->base.width / MACROBLOCK_WIDTH,
 492                    dec->base.height / MACROBLOCK_HEIGHT))
 493       goto error_vertex_buffer;
 494
 495    if (!init_mc_buffer(buffer))
 496       goto error_mc;
 497
 498    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 499       if (!init_idct_buffer(buffer))
 500          goto error_idct;
 501
 502    if (!init_zscan_buffer(buffer))
 503       goto error_zscan;
 504
 505    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
 506       vl_mpg12_bs_init(&buffer->bs,
 507                        dec->base.width / MACROBLOCK_WIDTH,
 508                        dec->base.height / MACROBLOCK_HEIGHT);
 509
 510    return &buffer->base;
 511
 512 error_zscan:
 513    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 514       cleanup_idct_buffer(buffer);
 515
 516 error_idct:
 517    cleanup_mc_buffer(buffer);
 518
 519 error_mc:
 520    vl_vb_cleanup(&buffer->vertex_stream);
 521
 522 error_vertex_buffer:
 523    FREE(buffer);
 524    return NULL;
 525 }
 526
 527 static void
 528 vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 529                                unsigned num_ycbcr_blocks[3],
 530                                struct pipe_video_buffer *refs[2],
 531                                struct pipe_video_buffer *dst)
 532 {
 533    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
 534    struct vl_mpeg12_decoder *dec;
 535
 536    struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
 537    struct pipe_surface **surfaces;
 538
 539    struct pipe_vertex_buffer vb[3];
 540
 541    unsigned i, j, component;
 542    unsigned nr_components;
 543
 544    assert(buf);
 545
 546    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
 547    assert(dec);
 548
 549    for (i = 0; i < 2; ++i)
 550       sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
 551
 552    vb[0] = dec->quads;
 553    vb[1] = dec->pos;
 554
 555    surfaces = dst->get_surfaces(dst);
 556
 557    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv);
 558    for (i = 0; i < VL_MAX_PLANES; ++i) {
 559       if (!surfaces[i]) continue;
 560
 561       vl_mc_set_surface(&buf->mc[i], surfaces[i]);
 562
 563       for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
 564          if (!sv[j]) continue;
 565
 566          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
 567          dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 568
 569          vl_mc_render_ref(&buf->mc[i], sv[j][i]);
 570       }
 571    }
 572
 573    vb[2] = dec->block_num;
 574
 575    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
 576    for (i = 0; i < VL_MAX_PLANES; ++i) {
 577       if (!num_ycbcr_blocks[i]) continue;
 578
 579       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
 580       dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 581
 582       vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
 583
 584       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 585          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
 586    }
 587
 588    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
 589    for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
 590       if (!surfaces[i]) continue;
 591
 592       nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
 593       for (j = 0; j < nr_components; ++j, ++component) {
 594          if (!num_ycbcr_blocks[i]) continue;
 595
 596          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
 597          dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 598
 599          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
 600             vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
 601          else {
 602             dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]);
 603             dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr);
 604          }
 605          vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
 606       }
 607    }
 608 }
 609
 610 static bool
 611 init_pipe_state(struct vl_mpeg12_decoder *dec)
 612 {
 613    struct pipe_depth_stencil_alpha_state dsa;
 614    struct pipe_sampler_state sampler;
 615    unsigned i;
 616
 617    assert(dec);
 618
 619    memset(&dsa, 0, sizeof dsa);
 620    dsa.depth.enabled = 0;
 621    dsa.depth.writemask = 0;
 622    dsa.depth.func = PIPE_FUNC_ALWAYS;
 623    for (i = 0; i < 2; ++i) {
 624       dsa.stencil[i].enabled = 0;
 625       dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
 626       dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
 627       dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
 628       dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
 629       dsa.stencil[i].valuemask = 0;
 630       dsa.stencil[i].writemask = 0;
 631    }
 632    dsa.alpha.enabled = 0;
 633    dsa.alpha.func = PIPE_FUNC_ALWAYS;
 634    dsa.alpha.ref_value = 0;
 635    dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa);
 636    dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 637
 638    memset(&sampler, 0, sizeof(sampler));
 639    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
 640    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
 641    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
 642    sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
 643    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
 644    sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
 645    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
 646    sampler.compare_func = PIPE_FUNC_ALWAYS;
 647    sampler.normalized_coords = 1;
 648    dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler);
 649    if (!dec->sampler_ycbcr)
 650       return false;
 651
 652    return true;
 653 }
 654
 655 static const struct format_config*
 656 find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config configs[], unsigned num_configs)
 657 {
 658    struct pipe_screen *screen;
 659    unsigned i;
 660
 661    assert(dec);
 662
 663    screen = dec->pipe->screen;
 664
 665    for (i = 0; i < num_configs; ++i) {
 666       if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D,
 667                                        1, PIPE_BIND_SAMPLER_VIEW))
 668          continue;
 669
 670       if (configs[i].idct_source_format != PIPE_FORMAT_NONE) {
 671          if (!screen->is_format_supported(screen, configs[i].idct_source_format, PIPE_TEXTURE_2D,
 672                                           1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
 673             continue;
 674
 675          if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_3D,
 676                                           1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
 677             continue;
 678       } else {
 679          if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_2D,
 680                                           1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
 681             continue;
 682       }
 683       return &configs[i];
 684    }
 685
 686    return NULL;
 687 }
 688
 689 static bool
 690 init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 691 {
 692    unsigned num_channels;
 693
 694    assert(dec);
 695
 696    dec->zscan_source_format = format_config->zscan_source_format;
 697    dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
 698    dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
 699    dec->zscan_alternate = vl_zscan_layout(dec->pipe, vl_zscan_alternate, dec->blocks_per_line);
 700
 701    num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
 702
 703    if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
 704                       dec->blocks_per_line, dec->num_blocks, num_channels))
 705       return false;
 706
 707    if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
 708                       dec->blocks_per_line, dec->num_blocks, num_channels))
 709       return false;
 710
 711    return true;
 712 }
 713
 714 static bool
 715 init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 716 {
 717    unsigned nr_of_idct_render_targets, max_inst;
 718    enum pipe_format formats[3];
 719
 720    struct pipe_sampler_view *matrix = NULL;
 721
 722    nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
 723    max_inst = dec->pipe->screen->get_shader_param(dec->pipe->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
 724
 725    // Just assume we need 32 inst per render target, not 100% true, but should work in most cases
 726    if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4)
 727       // more than 4 render targets usually doesn't makes any seens
 728       nr_of_idct_render_targets = 4;
 729    else
 730       nr_of_idct_render_targets = 1;
 731
 732    formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
 733    dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
 734                                            dec->base.width / 4, dec->base.height, 1,
 735                                            dec->base.chroma_format,
 736                                            formats, PIPE_USAGE_STATIC);
 737    if (!dec->idct_source)
 738       goto error_idct_source;
 739
 740    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
 741    dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
 742                                          dec->base.width / nr_of_idct_render_targets,
 743                                          dec->base.height / 4, nr_of_idct_render_targets,
 744                                          dec->base.chroma_format,
 745                                          formats, PIPE_USAGE_STATIC);
 746
 747    if (!dec->mc_source)
 748       goto error_mc_source;
 749
 750    if (!(matrix = vl_idct_upload_matrix(dec->pipe, format_config->idct_scale)))
 751       goto error_matrix;
 752
 753    if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
 754                      nr_of_idct_render_targets, matrix, matrix))
 755       goto error_y;
 756
 757    if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
 758                     nr_of_idct_render_targets, matrix, matrix))
 759       goto error_c;
 760
 761    pipe_sampler_view_reference(&matrix, NULL);
 762
 763    return true;
 764
 765 error_c:
 766    vl_idct_cleanup(&dec->idct_y);
 767
 768 error_y:
 769    pipe_sampler_view_reference(&matrix, NULL);
 770
 771 error_matrix:
 772    dec->mc_source->destroy(dec->mc_source);
 773
 774 error_mc_source:
 775    dec->idct_source->destroy(dec->idct_source);
 776
 777 error_idct_source:
 778    return false;
 779 }
 780
 781 static bool
 782 init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 783 {
 784    enum pipe_format formats[3];
 785
 786    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
 787    dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
 788                                          dec->base.width, dec->base.height, 1,
 789                                          dec->base.chroma_format,
 790                                          formats, PIPE_USAGE_STATIC);
 791
 792    return dec->mc_source != NULL;
 793 }
 794
 795 static void
 796 mc_vert_shader_callback(void *priv, struct vl_mc *mc,
 797                         struct ureg_program *shader,
 798                         unsigned first_output,
 799                         struct ureg_dst tex)
 800 {
 801    struct vl_mpeg12_decoder *dec = priv;
 802    struct ureg_dst o_vtex;
 803
 804    assert(priv && mc);
 805    assert(shader);
 806
 807    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 808       struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
 809       vl_idct_stage2_vert_shader(idct, shader, first_output, tex);
 810    } else {
 811       o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output);
 812       ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex));
 813    }
 814 }
 815
 816 static void
 817 mc_frag_shader_callback(void *priv, struct vl_mc *mc,
 818                         struct ureg_program *shader,
 819                         unsigned first_input,
 820                         struct ureg_dst dst)
 821 {
 822    struct vl_mpeg12_decoder *dec = priv;
 823    struct ureg_src src, sampler;
 824
 825    assert(priv && mc);
 826    assert(shader);
 827
 828    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 829       struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
 830       vl_idct_stage2_frag_shader(idct, shader, first_input, dst);
 831    } else {
 832       src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR);
 833       sampler = ureg_DECL_sampler(shader, 0);
 834       ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler);
 835    }
 836 }
 837
 838 struct pipe_video_decoder *
 839 vl_create_mpeg12_decoder(struct pipe_video_context *context,
 840                          struct pipe_context *pipe,
 841                          enum pipe_video_profile profile,
 842                          enum pipe_video_entrypoint entrypoint,
 843                          enum pipe_video_chroma_format chroma_format,
 844                          unsigned width, unsigned height)
 845 {
 846    const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
 847    const struct format_config *format_config;
 848    struct vl_mpeg12_decoder *dec;
 849
 850    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
 851
 852    dec = CALLOC_STRUCT(vl_mpeg12_decoder);
 853
 854    if (!dec)
 855       return NULL;
 856
 857    dec->base.context = context;
 858    dec->base.profile = profile;
 859    dec->base.entrypoint = entrypoint;
 860    dec->base.chroma_format = chroma_format;
 861    dec->base.width = width;
 862    dec->base.height = height;
 863
 864    dec->base.destroy = vl_mpeg12_destroy;
 865    dec->base.create_buffer = vl_mpeg12_create_buffer;
 866    dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
 867
 868    dec->pipe = pipe;
 869
 870    dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
 871    dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
 872
 873    dec->quads = vl_vb_upload_quads(dec->pipe);
 874    dec->pos = vl_vb_upload_pos(
 875       dec->pipe,
 876       dec->base.width / MACROBLOCK_WIDTH,
 877       dec->base.height / MACROBLOCK_HEIGHT
 878    );
 879    dec->block_num = vl_vb_upload_block_num(dec->pipe, dec->num_blocks);
 880
 881    dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->pipe);
 882    dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
 883
 884    /* TODO: Implement 422, 444 */
 885    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 886
 887    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
 888       dec->chroma_width = dec->base.width / 2;
 889       dec->chroma_height = dec->base.height / 2;
 890    } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
 891       dec->chroma_width = dec->base.width;
 892       dec->chroma_height = dec->base.height / 2;
 893    } else {
 894       dec->chroma_width = dec->base.width;
 895       dec->chroma_height = dec->base.height;
 896    }
 897
 898    switch (entrypoint) {
 899    case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
 900       format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
 901       break;
 902
 903    case PIPE_VIDEO_ENTRYPOINT_IDCT:
 904       format_config = find_format_config(dec, idct_format_config, num_idct_format_configs);
 905       break;
 906
 907    case PIPE_VIDEO_ENTRYPOINT_MC:
 908       format_config = find_format_config(dec, mc_format_config, num_mc_format_configs);
 909       break;
 910
 911    default:
 912       assert(0);
 913       return NULL;
 914    }
 915
 916    if (!format_config)
 917       return NULL;
 918
 919    if (!init_zscan(dec, format_config))
 920       goto error_zscan;
 921
 922    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 923       if (!init_idct(dec, format_config))
 924          goto error_sources;
 925    } else {
 926       if (!init_mc_source_widthout_idct(dec, format_config))
 927          goto error_sources;
 928    }
 929
 930    if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, format_config->mc_scale,
 931                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
 932       goto error_mc_y;
 933
 934    // TODO
 935    if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, format_config->mc_scale,
 936                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
 937       goto error_mc_c;
 938
 939    if (!init_pipe_state(dec))
 940       goto error_pipe_state;
 941
 942    return &dec->base;
 943
 944 error_pipe_state:
 945    vl_mc_cleanup(&dec->mc_c);
 946
 947 error_mc_c:
 948    vl_mc_cleanup(&dec->mc_y);
 949
 950 error_mc_y:
 951    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
 952       vl_idct_cleanup(&dec->idct_y);
 953       vl_idct_cleanup(&dec->idct_c);
 954       dec->idct_source->destroy(dec->idct_source);
 955    }
 956    dec->mc_source->destroy(dec->mc_source);
 957
 958 error_sources:
 959    vl_zscan_cleanup(&dec->zscan_y);
 960    vl_zscan_cleanup(&dec->zscan_c);
 961
 962 error_zscan:
 963    FREE(dec);
 964    return NULL;
 965 }