[g3dvl] respect maximum instruction for idct render targets
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_decoder.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <math.h>
29 #include <assert.h>
30
31 #include <util/u_memory.h>
32 #include <util/u_rect.h>
33 #include <util/u_video.h>
34
35 #include "vl_mpeg12_decoder.h"
36 #include "vl_defines.h"
37
38 #define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
39 #define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
40
41 struct format_config {
42 enum pipe_format zscan_source_format;
43 enum pipe_format idct_source_format;
44 enum pipe_format mc_source_format;
45
46 float idct_scale;
47 float mc_scale;
48 };
49
50 static const struct format_config bitstream_format_config[] = {
51 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
52 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
53 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
54 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
55 };
56
57 static const unsigned num_bitstream_format_configs =
58 sizeof(bitstream_format_config) / sizeof(struct format_config);
59
60 static const struct format_config idct_format_config[] = {
61 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
62 { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
63 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
64 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
65 };
66
67 static const unsigned num_idct_format_configs =
68 sizeof(idct_format_config) / sizeof(struct format_config);
69
70 static const struct format_config mc_format_config[] = {
71 //{ PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SSCALED, 0.0f, SCALE_FACTOR_SSCALED },
72 { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SNORM, 0.0f, SCALE_FACTOR_SNORM }
73 };
74
75 static const unsigned num_mc_format_configs =
76 sizeof(mc_format_config) / sizeof(struct format_config);
77
78 static bool
79 init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
80 {
81 enum pipe_format formats[3];
82
83 struct pipe_sampler_view **source;
84 struct pipe_surface **destination;
85
86 struct vl_mpeg12_decoder *dec;
87
88 unsigned i;
89
90 assert(buffer);
91
92 dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
93
94 formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
95 buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
96 dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
97 align(dec->max_blocks, dec->blocks_per_line) / dec->blocks_per_line,
98 1, PIPE_VIDEO_CHROMA_FORMAT_444,
99 formats, PIPE_USAGE_STATIC);
100 if (!buffer->zscan_source)
101 goto error_source;
102
103 source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
104 if (!source)
105 goto error_sampler;
106
107 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
108 destination = dec->idct_source->get_surfaces(dec->idct_source);
109 else
110 destination = dec->mc_source->get_surfaces(dec->mc_source);
111
112 if (!destination)
113 goto error_surface;
114
115 for (i = 0; i < VL_MAX_PLANES; ++i)
116 if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
117 &buffer->zscan[i], source[i], destination[i]))
118 goto error_plane;
119
120 return true;
121
122 error_plane:
123 for (; i > 0; --i)
124 vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
125
126 error_surface:
127 error_sampler:
128 buffer->zscan_source->destroy(buffer->zscan_source);
129
130 error_source:
131 return false;
132 }
133
134 static void
135 cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
136 {
137 unsigned i;
138
139 assert(buffer);
140
141 for (i = 0; i < VL_MAX_PLANES; ++i)
142 vl_zscan_cleanup_buffer(&buffer->zscan[i]);
143 buffer->zscan_source->destroy(buffer->zscan_source);
144 }
145
146 static bool
147 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
148 {
149 struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
150
151 struct vl_mpeg12_decoder *dec;
152
153 unsigned i;
154
155 assert(buffer);
156
157 dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
158
159 idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
160 if (!idct_source_sv)
161 goto error_source_sv;
162
163 mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
164 if (!mc_source_sv)
165 goto error_mc_source_sv;
166
167 for (i = 0; i < 3; ++i)
168 if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
169 &buffer->idct[i], idct_source_sv[i],
170 mc_source_sv[i]))
171 goto error_plane;
172
173 return true;
174
175 error_plane:
176 for (; i > 0; --i)
177 vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
178
179 error_mc_source_sv:
180 error_source_sv:
181 return false;
182 }
183
184 static void
185 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
186 {
187 struct vl_mpeg12_decoder *dec;
188 assert(buf);
189
190 dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
191 assert(dec);
192
193 vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
194 vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
195 vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
196 }
197
198 static bool
199 init_mc_buffer(struct vl_mpeg12_buffer *buf)
200 {
201 struct vl_mpeg12_decoder *dec;
202
203 assert(buf);
204
205 dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
206 assert(dec);
207
208 if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
209 goto error_mc_y;
210
211 if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1]))
212 goto error_mc_cb;
213
214 if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2]))
215 goto error_mc_cr;
216
217 return true;
218
219 error_mc_cr:
220 vl_mc_cleanup_buffer(&buf->mc[1]);
221
222 error_mc_cb:
223 vl_mc_cleanup_buffer(&buf->mc[0]);
224
225 error_mc_y:
226 return false;
227 }
228
229 static void
230 cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
231 {
232 unsigned i;
233
234 assert(buf);
235
236 for (i = 0; i < VL_MAX_PLANES; ++i)
237 vl_mc_cleanup_buffer(&buf->mc[i]);
238 }
239
240 static void
241 vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
242 {
243 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
244 struct vl_mpeg12_decoder *dec;
245
246 assert(buf);
247
248 dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
249 assert(dec);
250
251 cleanup_zscan_buffer(buf);
252
253 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
254 cleanup_idct_buffer(buf);
255
256 cleanup_mc_buffer(buf);
257
258 vl_vb_cleanup(&buf->vertex_stream);
259
260 FREE(buf);
261 }
262
263 static void
264 vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
265 {
266 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
267 struct vl_mpeg12_decoder *dec;
268
269 struct pipe_sampler_view **sampler_views;
270 unsigned i;
271
272 assert(buf);
273
274 dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
275 assert(dec);
276
277 vl_vb_map(&buf->vertex_stream, dec->pipe);
278
279 sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
280
281 assert(sampler_views);
282
283 for (i = 0; i < VL_MAX_PLANES; ++i) {
284 struct pipe_resource *tex = sampler_views[i]->texture;
285 struct pipe_box rect =
286 {
287 0, 0, 0,
288 tex->width0,
289 tex->height0,
290 1
291 };
292
293 buf->tex_transfer[i] = dec->pipe->get_transfer
294 (
295 dec->pipe, tex,
296 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
297 &rect
298 );
299
300 buf->texels[i] = dec->pipe->transfer_map(dec->pipe, buf->tex_transfer[i]);
301 }
302
303 if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
304 struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
305 struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
306
307 for (i = 0; i < VL_MAX_PLANES; ++i)
308 ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
309
310 for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
311 mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
312
313 vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
314 } else {
315 static const uint8_t dummy_quant[64] = {
316 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
317 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
318 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
319 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
320 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
321 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
322 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
323 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
324 };
325
326 for (i = 0; i < VL_MAX_PLANES; ++i) {
327 vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
328 vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
329 }
330 }
331 }
332
333 static struct pipe_ycbcr_block *
334 vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
335 {
336 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
337
338 assert(buf);
339
340 return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
341 }
342
343 static short *
344 vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
345 {
346 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
347
348 assert(buf);
349 assert(component < VL_MAX_PLANES);
350
351 return buf->texels[component];
352 }
353
354 static unsigned
355 vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
356 {
357 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
358
359 assert(buf);
360
361 return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
362 }
363
364 static struct pipe_motionvector *
365 vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
366 {
367 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
368
369 assert(buf);
370
371 return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
372 }
373
374 static void
375 vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
376 unsigned num_bytes, const void *data,
377 struct pipe_mpeg12_picture_desc *picture,
378 unsigned num_ycbcr_blocks[3])
379 {
380 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
381 uint8_t intra_quantizer_matrix[64];
382 struct vl_mpeg12_decoder *dec;
383 unsigned i;
384
385 assert(buf);
386
387 dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
388 assert(dec);
389
390 memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
391 intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision);
392
393 for (i = 0; i < VL_MAX_PLANES; ++i) {
394 vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
395 vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix);
396 }
397
398 vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
399 }
400
401 static void
402 vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
403 {
404 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
405 struct vl_mpeg12_decoder *dec;
406 unsigned i;
407
408 assert(buf);
409
410 dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
411 assert(dec);
412
413 vl_vb_unmap(&buf->vertex_stream, dec->pipe);
414
415 for (i = 0; i < VL_MAX_PLANES; ++i) {
416 dec->pipe->transfer_unmap(dec->pipe, buf->tex_transfer[i]);
417 dec->pipe->transfer_destroy(dec->pipe, buf->tex_transfer[i]);
418 }
419 }
420
421 static void
422 vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
423 {
424 struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
425
426 assert(decoder);
427
428 /* Asserted in softpipe_delete_fs_state() for some reason */
429 dec->pipe->bind_vs_state(dec->pipe, NULL);
430 dec->pipe->bind_fs_state(dec->pipe, NULL);
431
432 dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
433 dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr);
434
435 vl_mc_cleanup(&dec->mc_y);
436 vl_mc_cleanup(&dec->mc_c);
437 dec->mc_source->destroy(dec->mc_source);
438
439 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
440 vl_idct_cleanup(&dec->idct_y);
441 vl_idct_cleanup(&dec->idct_c);
442 dec->idct_source->destroy(dec->idct_source);
443 }
444
445 vl_zscan_cleanup(&dec->zscan_y);
446 vl_zscan_cleanup(&dec->zscan_c);
447
448 dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
449 dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
450
451 pipe_resource_reference(&dec->quads.buffer, NULL);
452 pipe_resource_reference(&dec->pos.buffer, NULL);
453
454 pipe_sampler_view_reference(&dec->zscan_linear, NULL);
455 pipe_sampler_view_reference(&dec->zscan_normal, NULL);
456 pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
457
458 FREE(dec);
459 }
460
461 static struct pipe_video_decode_buffer *
462 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
463 {
464 struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
465 struct vl_mpeg12_buffer *buffer;
466
467 assert(dec);
468
469 buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
470 if (buffer == NULL)
471 return NULL;
472
473 buffer->base.decoder = decoder;
474 buffer->base.destroy = vl_mpeg12_buffer_destroy;
475 buffer->base.map = vl_mpeg12_buffer_map;
476 buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
477 buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
478 buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
479 buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
480 buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
481 buffer->base.unmap = vl_mpeg12_buffer_unmap;
482
483 if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
484 dec->base.width / MACROBLOCK_WIDTH,
485 dec->base.height / MACROBLOCK_HEIGHT))
486 goto error_vertex_buffer;
487
488 if (!init_mc_buffer(buffer))
489 goto error_mc;
490
491 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
492 if (!init_idct_buffer(buffer))
493 goto error_idct;
494
495 if (!init_zscan_buffer(buffer))
496 goto error_zscan;
497
498 if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
499 vl_mpg12_bs_init(&buffer->bs,
500 dec->base.width / MACROBLOCK_WIDTH,
501 dec->base.height / MACROBLOCK_HEIGHT);
502
503 return &buffer->base;
504
505 error_zscan:
506 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
507 cleanup_idct_buffer(buffer);
508
509 error_idct:
510 cleanup_mc_buffer(buffer);
511
512 error_mc:
513 vl_vb_cleanup(&buffer->vertex_stream);
514
515 error_vertex_buffer:
516 FREE(buffer);
517 return NULL;
518 }
519
520 static void
521 vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
522 unsigned num_ycbcr_blocks[3],
523 struct pipe_video_buffer *refs[2],
524 struct pipe_video_buffer *dst)
525 {
526 struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
527 struct vl_mpeg12_decoder *dec;
528
529 struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
530 struct pipe_surface **surfaces;
531
532 struct pipe_vertex_buffer vb[3];
533
534 unsigned i, j, component;
535 unsigned nr_components;
536
537 assert(buf);
538
539 dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
540 assert(dec);
541
542 for (i = 0; i < 2; ++i)
543 sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
544
545 vb[0] = dec->quads;
546 vb[1] = dec->pos;
547
548 surfaces = dst->get_surfaces(dst);
549
550 dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv);
551 for (i = 0; i < VL_MAX_PLANES; ++i) {
552 if (!surfaces[i]) continue;
553
554 vl_mc_set_surface(&buf->mc[i], surfaces[i]);
555
556 for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
557 if (!sv[j]) continue;
558
559 vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
560 dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
561
562 vl_mc_render_ref(&buf->mc[i], sv[j][i]);
563 }
564 }
565
566 dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
567 for (i = 0; i < VL_MAX_PLANES; ++i) {
568 if (!num_ycbcr_blocks[i]) continue;
569
570 vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
571 dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
572
573 vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
574
575 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
576 vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
577 }
578
579 mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
580 for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
581 if (!surfaces[i]) continue;
582
583 nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
584 for (j = 0; j < nr_components; ++j, ++component) {
585 if (!num_ycbcr_blocks[i]) continue;
586
587 vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
588 dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
589
590 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
591 vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
592 else {
593 dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]);
594 dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr);
595 }
596 vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
597 }
598 }
599 }
600
601 static bool
602 init_pipe_state(struct vl_mpeg12_decoder *dec)
603 {
604 struct pipe_depth_stencil_alpha_state dsa;
605 struct pipe_sampler_state sampler;
606 unsigned i;
607
608 assert(dec);
609
610 memset(&dsa, 0, sizeof dsa);
611 dsa.depth.enabled = 0;
612 dsa.depth.writemask = 0;
613 dsa.depth.func = PIPE_FUNC_ALWAYS;
614 for (i = 0; i < 2; ++i) {
615 dsa.stencil[i].enabled = 0;
616 dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
617 dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
618 dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
619 dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
620 dsa.stencil[i].valuemask = 0;
621 dsa.stencil[i].writemask = 0;
622 }
623 dsa.alpha.enabled = 0;
624 dsa.alpha.func = PIPE_FUNC_ALWAYS;
625 dsa.alpha.ref_value = 0;
626 dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa);
627 dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa);
628
629 memset(&sampler, 0, sizeof(sampler));
630 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
631 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
632 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
633 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
634 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
635 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
636 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
637 sampler.compare_func = PIPE_FUNC_ALWAYS;
638 sampler.normalized_coords = 1;
639 dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler);
640 if (!dec->sampler_ycbcr)
641 return false;
642
643 return true;
644 }
645
646 static const struct format_config*
647 find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config configs[], unsigned num_configs)
648 {
649 struct pipe_screen *screen;
650 unsigned i;
651
652 assert(dec);
653
654 screen = dec->pipe->screen;
655
656 for (i = 0; i < num_configs; ++i) {
657 if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D,
658 1, PIPE_BIND_SAMPLER_VIEW))
659 continue;
660
661 if (configs[i].idct_source_format != PIPE_FORMAT_NONE) {
662 if (!screen->is_format_supported(screen, configs[i].idct_source_format, PIPE_TEXTURE_2D,
663 1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
664 continue;
665
666 if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_3D,
667 1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
668 continue;
669 } else {
670 if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_2D,
671 1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
672 continue;
673 }
674 return &configs[i];
675 }
676
677 return NULL;
678 }
679
680 static bool
681 init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
682 {
683 const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
684 unsigned num_channels;
685
686 assert(dec);
687
688 dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
689 dec->max_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
690
691 dec->zscan_source_format = format_config->zscan_source_format;
692 dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
693 dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
694 dec->zscan_alternate = vl_zscan_layout(dec->pipe, vl_zscan_alternate, dec->blocks_per_line);
695
696 num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
697
698 if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
699 dec->blocks_per_line, dec->max_blocks, num_channels))
700 return false;
701
702 if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
703 dec->blocks_per_line, dec->max_blocks, num_channels))
704 return false;
705
706 return true;
707 }
708
709 static bool
710 init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
711 {
712 unsigned nr_of_idct_render_targets, max_inst;
713 enum pipe_format formats[3];
714
715 struct pipe_sampler_view *matrix = NULL;
716
717 nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
718 max_inst = dec->pipe->screen->get_shader_param(dec->pipe->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
719
720 // Just assume we need 32 inst per render target, not 100% true, but should work in most cases
721 if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4)
722 // more than 4 render targets usually doesn't makes any seens
723 nr_of_idct_render_targets = 4;
724 else
725 nr_of_idct_render_targets = 1;
726
727 formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
728 dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
729 dec->base.width / 4, dec->base.height, 1,
730 dec->base.chroma_format,
731 formats, PIPE_USAGE_STATIC);
732 if (!dec->idct_source)
733 goto error_idct_source;
734
735 formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
736 dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
737 dec->base.width / nr_of_idct_render_targets,
738 dec->base.height / 4, nr_of_idct_render_targets,
739 dec->base.chroma_format,
740 formats, PIPE_USAGE_STATIC);
741
742 if (!dec->mc_source)
743 goto error_mc_source;
744
745 if (!(matrix = vl_idct_upload_matrix(dec->pipe, format_config->idct_scale)))
746 goto error_matrix;
747
748 if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
749 nr_of_idct_render_targets, matrix, matrix))
750 goto error_y;
751
752 if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
753 nr_of_idct_render_targets, matrix, matrix))
754 goto error_c;
755
756 pipe_sampler_view_reference(&matrix, NULL);
757
758 return true;
759
760 error_c:
761 vl_idct_cleanup(&dec->idct_y);
762
763 error_y:
764 pipe_sampler_view_reference(&matrix, NULL);
765
766 error_matrix:
767 dec->mc_source->destroy(dec->mc_source);
768
769 error_mc_source:
770 dec->idct_source->destroy(dec->idct_source);
771
772 error_idct_source:
773 return false;
774 }
775
776 static bool
777 init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
778 {
779 enum pipe_format formats[3];
780
781 formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
782 dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
783 dec->base.width, dec->base.height, 1,
784 dec->base.chroma_format,
785 formats, PIPE_USAGE_STATIC);
786
787 return dec->mc_source != NULL;
788 }
789
790 static void
791 mc_vert_shader_callback(void *priv, struct vl_mc *mc,
792 struct ureg_program *shader,
793 unsigned first_output,
794 struct ureg_dst tex)
795 {
796 struct vl_mpeg12_decoder *dec = priv;
797 struct ureg_dst o_vtex;
798
799 assert(priv && mc);
800 assert(shader);
801
802 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
803 struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
804 vl_idct_stage2_vert_shader(idct, shader, first_output, tex);
805 } else {
806 o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output);
807 ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex));
808 }
809 }
810
811 static void
812 mc_frag_shader_callback(void *priv, struct vl_mc *mc,
813 struct ureg_program *shader,
814 unsigned first_input,
815 struct ureg_dst dst)
816 {
817 struct vl_mpeg12_decoder *dec = priv;
818 struct ureg_src src, sampler;
819
820 assert(priv && mc);
821 assert(shader);
822
823 if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
824 struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
825 vl_idct_stage2_frag_shader(idct, shader, first_input, dst);
826 } else {
827 src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR);
828 sampler = ureg_DECL_sampler(shader, 0);
829 ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler);
830 }
831 }
832
833 struct pipe_video_decoder *
834 vl_create_mpeg12_decoder(struct pipe_video_context *context,
835 struct pipe_context *pipe,
836 enum pipe_video_profile profile,
837 enum pipe_video_entrypoint entrypoint,
838 enum pipe_video_chroma_format chroma_format,
839 unsigned width, unsigned height)
840 {
841 const struct format_config *format_config;
842 struct vl_mpeg12_decoder *dec;
843
844 assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
845
846 dec = CALLOC_STRUCT(vl_mpeg12_decoder);
847
848 if (!dec)
849 return NULL;
850
851 dec->base.context = context;
852 dec->base.profile = profile;
853 dec->base.entrypoint = entrypoint;
854 dec->base.chroma_format = chroma_format;
855 dec->base.width = width;
856 dec->base.height = height;
857
858 dec->base.destroy = vl_mpeg12_destroy;
859 dec->base.create_buffer = vl_mpeg12_create_buffer;
860 dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
861
862 dec->pipe = pipe;
863
864 dec->quads = vl_vb_upload_quads(dec->pipe);
865 dec->pos = vl_vb_upload_pos(
866 dec->pipe,
867 dec->base.width / MACROBLOCK_WIDTH,
868 dec->base.height / MACROBLOCK_HEIGHT
869 );
870
871 dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->pipe);
872 dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
873
874 /* TODO: Implement 422, 444 */
875 assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
876
877 if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
878 dec->chroma_width = dec->base.width / 2;
879 dec->chroma_height = dec->base.height / 2;
880 } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
881 dec->chroma_width = dec->base.width;
882 dec->chroma_height = dec->base.height / 2;
883 } else {
884 dec->chroma_width = dec->base.width;
885 dec->chroma_height = dec->base.height;
886 }
887
888 switch (entrypoint) {
889 case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
890 format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
891 break;
892
893 case PIPE_VIDEO_ENTRYPOINT_IDCT:
894 format_config = find_format_config(dec, idct_format_config, num_idct_format_configs);
895 break;
896
897 case PIPE_VIDEO_ENTRYPOINT_MC:
898 format_config = find_format_config(dec, mc_format_config, num_mc_format_configs);
899 break;
900
901 default:
902 assert(0);
903 return NULL;
904 }
905
906 if (!format_config)
907 return NULL;
908
909 if (!init_zscan(dec, format_config))
910 goto error_zscan;
911
912 if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
913 if (!init_idct(dec, format_config))
914 goto error_sources;
915 } else {
916 if (!init_mc_source_widthout_idct(dec, format_config))
917 goto error_sources;
918 }
919
920 if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, format_config->mc_scale,
921 mc_vert_shader_callback, mc_frag_shader_callback, dec))
922 goto error_mc_y;
923
924 // TODO
925 if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, format_config->mc_scale,
926 mc_vert_shader_callback, mc_frag_shader_callback, dec))
927 goto error_mc_c;
928
929 if (!init_pipe_state(dec))
930 goto error_pipe_state;
931
932 return &dec->base;
933
934 error_pipe_state:
935 vl_mc_cleanup(&dec->mc_c);
936
937 error_mc_c:
938 vl_mc_cleanup(&dec->mc_y);
939
940 error_mc_y:
941 if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
942 vl_idct_cleanup(&dec->idct_y);
943 vl_idct_cleanup(&dec->idct_c);
944 dec->idct_source->destroy(dec->idct_source);
945 }
946 dec->mc_source->destroy(dec->mc_source);
947
948 error_sources:
949 vl_zscan_cleanup(&dec->zscan_y);
950 vl_zscan_cleanup(&dec->zscan_c);
951
952 error_zscan:
953 FREE(dec);
954 return NULL;
955 }