radeon/uvd: fix feedback buffer handling v2
[mesa.git] / src / gallium / drivers / radeon / radeon_uvd.c
1 /**************************************************************************
2 *
3 * Copyright 2011 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Christian König <christian.koenig@amd.com>
31 *
32 */
33
34 #include <sys/types.h>
35 #include <assert.h>
36 #include <errno.h>
37 #include <unistd.h>
38 #include <stdio.h>
39
40 #include "pipe/p_video_codec.h"
41
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
44
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
47
48 #include "../../winsys/radeon/drm/radeon_winsys.h"
49 #include "r600_pipe_common.h"
50 #include "radeon_uvd.h"
51
52 #define RUVD_ERR(fmt, args...) \
53 fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
54
55 #define NUM_BUFFERS 4
56
57 #define NUM_MPEG2_REFS 6
58 #define NUM_H264_REFS 17
59 #define NUM_VC1_REFS 5
60
61 #define FB_BUFFER_OFFSET 0x1000
62 #define FB_BUFFER_SIZE 2048
63
64 /* UVD buffer representation */
65 struct ruvd_buffer
66 {
67 struct pb_buffer* buf;
68 struct radeon_winsys_cs_handle* cs_handle;
69 };
70
71 /* UVD decoder representation */
72 struct ruvd_decoder {
73 struct pipe_video_codec base;
74
75 ruvd_set_dtb set_dtb;
76
77 unsigned stream_handle;
78 unsigned frame_number;
79
80 struct radeon_winsys* ws;
81 struct radeon_winsys_cs* cs;
82
83 unsigned cur_buffer;
84
85 struct ruvd_buffer msg_fb_buffers[NUM_BUFFERS];
86 struct ruvd_msg *msg;
87 uint32_t *fb;
88
89 struct ruvd_buffer bs_buffers[NUM_BUFFERS];
90 void* bs_ptr;
91 unsigned bs_size;
92
93 struct ruvd_buffer dpb;
94 };
95
96 /* generate an UVD stream handle */
97 static unsigned alloc_stream_handle()
98 {
99 static unsigned counter = 0;
100 unsigned stream_handle = 0;
101 unsigned pid = getpid();
102 int i;
103
104 for (i = 0; i < 32; ++i)
105 stream_handle |= ((pid >> i) & 1) << (31 - i);
106
107 stream_handle ^= ++counter;
108 return stream_handle;
109 }
110
111 /* flush IB to the hardware */
112 static void flush(struct ruvd_decoder *dec)
113 {
114 dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, 0);
115 }
116
117 /* add a new set register command to the IB */
118 static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
119 {
120 uint32_t *pm4 = dec->cs->buf;
121 pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0);
122 pm4[dec->cs->cdw++] = val;
123 }
124
125 /* send a command to the VCPU through the GPCOM registers */
126 static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
127 struct radeon_winsys_cs_handle* cs_buf, uint32_t off,
128 enum radeon_bo_usage usage, enum radeon_bo_domain domain)
129 {
130 int reloc_idx;
131
132 reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain);
133 set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
134 set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
135 set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
136 }
137
138 /* map the next available message/feedback buffer */
139 static void map_msg_fb_buf(struct ruvd_decoder *dec)
140 {
141 struct ruvd_buffer* buf;
142 uint8_t *ptr;
143
144 /* grab the current message/feedback buffer */
145 buf = &dec->msg_fb_buffers[dec->cur_buffer];
146
147 /* and map it for CPU access */
148 ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE);
149
150 /* calc buffer offsets */
151 dec->msg = (struct ruvd_msg *)ptr;
152 dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
153 }
154
155 /* unmap and send a message command to the VCPU */
156 static void send_msg_buf(struct ruvd_decoder *dec)
157 {
158 struct ruvd_buffer* buf;
159
160 /* ignore the request if message/feedback buffer isn't mapped */
161 if (!dec->msg || !dec->fb)
162 return;
163
164 /* grap the current message buffer */
165 buf = &dec->msg_fb_buffers[dec->cur_buffer];
166
167 /* unmap the buffer */
168 dec->ws->buffer_unmap(buf->cs_handle);
169 dec->msg = NULL;
170 dec->fb = NULL;
171
172 /* and send it to the hardware */
173 send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->cs_handle, 0,
174 RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
175 }
176
177 /* create a buffer in the winsys */
178 static bool create_buffer(struct ruvd_decoder *dec,
179 struct ruvd_buffer *buffer,
180 unsigned size)
181 {
182 buffer->buf = dec->ws->buffer_create(dec->ws, size, 4096, false,
183 RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM);
184 if (!buffer->buf)
185 return false;
186
187 buffer->cs_handle = dec->ws->buffer_get_cs_handle(buffer->buf);
188 if (!buffer->cs_handle)
189 return false;
190
191 return true;
192 }
193
194 /* destroy a buffer */
195 static void destroy_buffer(struct ruvd_buffer *buffer)
196 {
197 pb_reference(&buffer->buf, NULL);
198 buffer->cs_handle = NULL;
199 }
200
201 /* reallocate a buffer, preserving its content */
202 static bool resize_buffer(struct ruvd_decoder *dec,
203 struct ruvd_buffer *new_buf,
204 unsigned new_size)
205 {
206 unsigned bytes = MIN2(new_buf->buf->size, new_size);
207 struct ruvd_buffer old_buf = *new_buf;
208 void *src = NULL, *dst = NULL;
209
210 if (!create_buffer(dec, new_buf, new_size))
211 goto error;
212
213 src = dec->ws->buffer_map(old_buf.cs_handle, dec->cs, PIPE_TRANSFER_READ);
214 if (!src)
215 goto error;
216
217 dst = dec->ws->buffer_map(new_buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE);
218 if (!dst)
219 goto error;
220
221 memcpy(dst, src, bytes);
222 if (new_size > bytes) {
223 new_size -= bytes;
224 dst += bytes;
225 memset(dst, 0, new_size);
226 }
227 dec->ws->buffer_unmap(new_buf->cs_handle);
228 dec->ws->buffer_unmap(old_buf.cs_handle);
229 destroy_buffer(&old_buf);
230 return true;
231
232 error:
233 if (src) dec->ws->buffer_unmap(old_buf.cs_handle);
234 destroy_buffer(new_buf);
235 *new_buf = old_buf;
236 return false;
237 }
238
239 /* clear the buffer with zeros */
240 static void clear_buffer(struct ruvd_decoder *dec,
241 struct ruvd_buffer* buffer)
242 {
243 //TODO: let the GPU do the job
244 void *ptr = dec->ws->buffer_map(buffer->cs_handle, dec->cs,
245 PIPE_TRANSFER_WRITE);
246 if (!ptr)
247 return;
248
249 memset(ptr, 0, buffer->buf->size);
250 dec->ws->buffer_unmap(buffer->cs_handle);
251 }
252
253 /* cycle to the next set of buffers */
254 static void next_buffer(struct ruvd_decoder *dec)
255 {
256 ++dec->cur_buffer;
257 dec->cur_buffer %= NUM_BUFFERS;
258 }
259
260 /* convert the profile into something UVD understands */
261 static uint32_t profile2stream_type(enum pipe_video_profile profile)
262 {
263 switch (u_reduce_video_profile(profile)) {
264 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
265 return RUVD_CODEC_H264;
266
267 case PIPE_VIDEO_FORMAT_VC1:
268 return RUVD_CODEC_VC1;
269
270 case PIPE_VIDEO_FORMAT_MPEG12:
271 return RUVD_CODEC_MPEG2;
272
273 case PIPE_VIDEO_FORMAT_MPEG4:
274 return RUVD_CODEC_MPEG4;
275
276 default:
277 assert(0);
278 return 0;
279 }
280 }
281
282 /* calculate size of reference picture buffer */
283 static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
284 {
285 unsigned width_in_mb, height_in_mb, image_size, dpb_size;
286
287 // always align them to MB size for dpb calculation
288 unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
289 unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
290
291 // always one more for currently decoded picture
292 unsigned max_references = templ->max_references + 1;
293
294 // aligned size of a single frame
295 image_size = width * height;
296 image_size += image_size / 2;
297 image_size = align(image_size, 1024);
298
299 // picture width & height in 16 pixel units
300 width_in_mb = width / VL_MACROBLOCK_WIDTH;
301 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
302
303 switch (u_reduce_video_profile(templ->profile)) {
304 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
305 // the firmware seems to allways assume a minimum of ref frames
306 max_references = MAX2(NUM_H264_REFS, max_references);
307
308 // reference picture buffer
309 dpb_size = image_size * max_references;
310
311 // macroblock context buffer
312 dpb_size += width_in_mb * height_in_mb * max_references * 192;
313
314 // IT surface buffer
315 dpb_size += width_in_mb * height_in_mb * 32;
316 break;
317
318 case PIPE_VIDEO_FORMAT_VC1:
319 // the firmware seems to allways assume a minimum of ref frames
320 max_references = MAX2(NUM_VC1_REFS, max_references);
321
322 // reference picture buffer
323 dpb_size = image_size * max_references;
324
325 // CONTEXT_BUFFER
326 dpb_size += width_in_mb * height_in_mb * 128;
327
328 // IT surface buffer
329 dpb_size += width_in_mb * 64;
330
331 // DB surface buffer
332 dpb_size += width_in_mb * 128;
333
334 // BP
335 dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
336 break;
337
338 case PIPE_VIDEO_FORMAT_MPEG12:
339 // reference picture buffer, must be big enough for all frames
340 dpb_size = image_size * NUM_MPEG2_REFS;
341 break;
342
343 case PIPE_VIDEO_FORMAT_MPEG4:
344 // reference picture buffer
345 dpb_size = image_size * max_references;
346
347 // CM
348 dpb_size += width_in_mb * height_in_mb * 64;
349
350 // IT surface buffer
351 dpb_size += align(width_in_mb * height_in_mb * 32, 64);
352 break;
353
354 default:
355 // something is missing here
356 assert(0);
357
358 // at least use a sane default value
359 dpb_size = 32 * 1024 * 1024;
360 break;
361 }
362 return dpb_size;
363 }
364
365 /* get h264 specific message bits */
366 static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
367 {
368 struct ruvd_h264 result;
369
370 memset(&result, 0, sizeof(result));
371 switch (pic->base.profile) {
372 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
373 result.profile = RUVD_H264_PROFILE_BASELINE;
374 break;
375
376 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
377 result.profile = RUVD_H264_PROFILE_MAIN;
378 break;
379
380 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
381 result.profile = RUVD_H264_PROFILE_HIGH;
382 break;
383
384 default:
385 assert(0);
386 break;
387 }
388 if (((dec->base.width * dec->base.height) >> 8) <= 1620)
389 result.level = 30;
390 else
391 result.level = 41;
392
393 result.sps_info_flags = 0;
394 result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
395 result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
396 result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
397 result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
398
399 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
400 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
401 result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
402 result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
403 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
404
405 switch (dec->base.chroma_format) {
406 case PIPE_VIDEO_CHROMA_FORMAT_400:
407 result.chroma_format = 0;
408 break;
409 case PIPE_VIDEO_CHROMA_FORMAT_420:
410 result.chroma_format = 1;
411 break;
412 case PIPE_VIDEO_CHROMA_FORMAT_422:
413 result.chroma_format = 2;
414 break;
415 case PIPE_VIDEO_CHROMA_FORMAT_444:
416 result.chroma_format = 3;
417 break;
418 }
419
420 result.pps_info_flags = 0;
421 result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
422 result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
423 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
424 result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
425 result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
426 result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
427 result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
428 result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
429
430 result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
431 result.slice_group_map_type = pic->pps->slice_group_map_type;
432 result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
433 result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
434 result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
435 result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
436
437 memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
438 memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
439
440 result.num_ref_frames = pic->num_ref_frames;
441
442 result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
443 result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
444
445 result.frame_num = pic->frame_num;
446 memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
447 result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
448 result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
449 memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
450
451 result.decoded_pic_idx = pic->frame_num;
452
453 return result;
454 }
455
456 /* get vc1 specific message bits */
457 static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
458 {
459 struct ruvd_vc1 result;
460
461 memset(&result, 0, sizeof(result));
462
463 switch(pic->base.profile) {
464 case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
465 result.profile = RUVD_VC1_PROFILE_SIMPLE;
466 result.level = 1;
467 break;
468
469 case PIPE_VIDEO_PROFILE_VC1_MAIN:
470 result.profile = RUVD_VC1_PROFILE_MAIN;
471 result.level = 2;
472 break;
473
474 case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
475 result.profile = RUVD_VC1_PROFILE_ADVANCED;
476 result.level = 4;
477 break;
478
479 default:
480 assert(0);
481 }
482
483 /* fields common for all profiles */
484 result.sps_info_flags |= pic->postprocflag << 7;
485 result.sps_info_flags |= pic->pulldown << 6;
486 result.sps_info_flags |= pic->interlace << 5;
487 result.sps_info_flags |= pic->tfcntrflag << 4;
488 result.sps_info_flags |= pic->finterpflag << 3;
489 result.sps_info_flags |= pic->psf << 1;
490
491 result.pps_info_flags |= pic->range_mapy_flag << 31;
492 result.pps_info_flags |= pic->range_mapy << 28;
493 result.pps_info_flags |= pic->range_mapuv_flag << 27;
494 result.pps_info_flags |= pic->range_mapuv << 24;
495 result.pps_info_flags |= pic->multires << 21;
496 result.pps_info_flags |= pic->maxbframes << 16;
497 result.pps_info_flags |= pic->overlap << 11;
498 result.pps_info_flags |= pic->quantizer << 9;
499 result.pps_info_flags |= pic->panscan_flag << 7;
500 result.pps_info_flags |= pic->refdist_flag << 6;
501 result.pps_info_flags |= pic->vstransform << 0;
502
503 /* some fields only apply to main/advanced profile */
504 if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
505 result.pps_info_flags |= pic->syncmarker << 20;
506 result.pps_info_flags |= pic->rangered << 19;
507 result.pps_info_flags |= pic->loopfilter << 5;
508 result.pps_info_flags |= pic->fastuvmc << 4;
509 result.pps_info_flags |= pic->extended_mv << 3;
510 result.pps_info_flags |= pic->extended_dmv << 8;
511 result.pps_info_flags |= pic->dquant << 1;
512 }
513
514 result.chroma_format = 1;
515
516 #if 0
517 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
518 uint32_t slice_count
519 uint8_t picture_type
520 uint8_t frame_coding_mode
521 uint8_t deblockEnable
522 uint8_t pquant
523 #endif
524
525 return result;
526 }
527
528 /* extract the frame number from a referenced video buffer */
529 static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
530 {
531 uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
532 uint32_t max = MAX2(dec->frame_number, 1) - 1;
533 uintptr_t frame;
534
535 /* seems to be the most sane fallback */
536 if (!ref)
537 return max;
538
539 /* get the frame number from the associated data */
540 frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
541
542 /* limit the frame number to a valid range */
543 return MAX2(MIN2(frame, max), min);
544 }
545
546 /* get mpeg2 specific msg bits */
547 static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
548 struct pipe_mpeg12_picture_desc *pic)
549 {
550 const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
551 struct ruvd_mpeg2 result;
552 unsigned i;
553
554 memset(&result, 0, sizeof(result));
555 result.decoded_pic_idx = dec->frame_number;
556 for (i = 0; i < 2; ++i)
557 result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
558
559 result.load_intra_quantiser_matrix = 1;
560 result.load_nonintra_quantiser_matrix = 1;
561
562 for (i = 0; i < 64; ++i) {
563 result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
564 result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
565 }
566
567 result.profile_and_level_indication = 0;
568 result.chroma_format = 0x1;
569
570 result.picture_coding_type = pic->picture_coding_type;
571 result.f_code[0][0] = pic->f_code[0][0] + 1;
572 result.f_code[0][1] = pic->f_code[0][1] + 1;
573 result.f_code[1][0] = pic->f_code[1][0] + 1;
574 result.f_code[1][1] = pic->f_code[1][1] + 1;
575 result.intra_dc_precision = pic->intra_dc_precision;
576 result.pic_structure = pic->picture_structure;
577 result.top_field_first = pic->top_field_first;
578 result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
579 result.concealment_motion_vectors = pic->concealment_motion_vectors;
580 result.q_scale_type = pic->q_scale_type;
581 result.intra_vlc_format = pic->intra_vlc_format;
582 result.alternate_scan = pic->alternate_scan;
583
584 return result;
585 }
586
587 /* get mpeg4 specific msg bits */
588 static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
589 struct pipe_mpeg4_picture_desc *pic)
590 {
591 struct ruvd_mpeg4 result;
592 unsigned i;
593
594 memset(&result, 0, sizeof(result));
595 result.decoded_pic_idx = dec->frame_number;
596 for (i = 0; i < 2; ++i)
597 result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
598
599 result.variant_type = 0;
600 result.profile_and_level_indication = 0xF0; // ASP Level0
601
602 result.video_object_layer_verid = 0x5; // advanced simple
603 result.video_object_layer_shape = 0x0; // rectangular
604
605 result.video_object_layer_width = dec->base.width;
606 result.video_object_layer_height = dec->base.height;
607
608 result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
609
610 result.flags |= pic->short_video_header << 0;
611 //result.flags |= obmc_disable << 1;
612 result.flags |= pic->interlaced << 2;
613 result.flags |= 1 << 3; // load_intra_quant_mat
614 result.flags |= 1 << 4; // load_nonintra_quant_mat
615 result.flags |= pic->quarter_sample << 5;
616 result.flags |= 1 << 6; // complexity_estimation_disable
617 result.flags |= pic->resync_marker_disable << 7;
618 //result.flags |= data_partitioned << 8;
619 //result.flags |= reversible_vlc << 9;
620 result.flags |= 0 << 10; // newpred_enable
621 result.flags |= 0 << 11; // reduced_resolution_vop_enable
622 //result.flags |= scalability << 12;
623 //result.flags |= is_object_layer_identifier << 13;
624 //result.flags |= fixed_vop_rate << 14;
625 //result.flags |= newpred_segment_type << 15;
626
627 result.quant_type = pic->quant_type;
628
629 for (i = 0; i < 64; ++i) {
630 result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
631 result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
632 }
633
634 /*
635 int32_t trd [2]
636 int32_t trb [2]
637 uint8_t vop_coding_type
638 uint8_t vop_fcode_forward
639 uint8_t vop_fcode_backward
640 uint8_t rounding_control
641 uint8_t alternate_vertical_scan_flag
642 uint8_t top_field_first
643 */
644
645 return result;
646 }
647
648 /**
649 * destroy this video decoder
650 */
651 static void ruvd_destroy(struct pipe_video_codec *decoder)
652 {
653 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
654 unsigned i;
655
656 assert(decoder);
657
658 map_msg_fb_buf(dec);
659 memset(dec->msg, 0, sizeof(*dec->msg));
660 dec->msg->size = sizeof(*dec->msg);
661 dec->msg->msg_type = RUVD_MSG_DESTROY;
662 dec->msg->stream_handle = dec->stream_handle;
663 send_msg_buf(dec);
664
665 flush(dec);
666
667 dec->ws->cs_destroy(dec->cs);
668
669 for (i = 0; i < NUM_BUFFERS; ++i) {
670 destroy_buffer(&dec->msg_fb_buffers[i]);
671 destroy_buffer(&dec->bs_buffers[i]);
672 }
673
674 destroy_buffer(&dec->dpb);
675
676 FREE(dec);
677 }
678
679 /* free associated data in the video buffer callback */
680 static void ruvd_destroy_associated_data(void *data)
681 {
682 /* NOOP, since we only use an intptr */
683 }
684
685 /**
686 * start decoding of a new frame
687 */
688 static void ruvd_begin_frame(struct pipe_video_codec *decoder,
689 struct pipe_video_buffer *target,
690 struct pipe_picture_desc *picture)
691 {
692 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
693 uintptr_t frame;
694
695 assert(decoder);
696
697 frame = ++dec->frame_number;
698 vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
699 &ruvd_destroy_associated_data);
700
701 dec->bs_size = 0;
702 dec->bs_ptr = dec->ws->buffer_map(
703 dec->bs_buffers[dec->cur_buffer].cs_handle,
704 dec->cs, PIPE_TRANSFER_WRITE);
705 }
706
707 /**
708 * decode a macroblock
709 */
710 static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
711 struct pipe_video_buffer *target,
712 struct pipe_picture_desc *picture,
713 const struct pipe_macroblock *macroblocks,
714 unsigned num_macroblocks)
715 {
716 /* not supported (yet) */
717 assert(0);
718 }
719
720 /**
721 * decode a bitstream
722 */
723 static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
724 struct pipe_video_buffer *target,
725 struct pipe_picture_desc *picture,
726 unsigned num_buffers,
727 const void * const *buffers,
728 const unsigned *sizes)
729 {
730 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
731 unsigned i;
732
733 assert(decoder);
734
735 if (!dec->bs_ptr)
736 return;
737
738 for (i = 0; i < num_buffers; ++i) {
739 struct ruvd_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
740 unsigned new_size = dec->bs_size + sizes[i];
741
742 if (new_size > buf->buf->size) {
743 dec->ws->buffer_unmap(buf->cs_handle);
744 if (!resize_buffer(dec, buf, new_size)) {
745 RUVD_ERR("Can't resize bitstream buffer!");
746 return;
747 }
748
749 dec->bs_ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs,
750 PIPE_TRANSFER_WRITE);
751 if (!dec->bs_ptr)
752 return;
753
754 dec->bs_ptr += dec->bs_size;
755 }
756
757 memcpy(dec->bs_ptr, buffers[i], sizes[i]);
758 dec->bs_size += sizes[i];
759 dec->bs_ptr += sizes[i];
760 }
761 }
762
763 /**
764 * end decoding of the current frame
765 */
766 static void ruvd_end_frame(struct pipe_video_codec *decoder,
767 struct pipe_video_buffer *target,
768 struct pipe_picture_desc *picture)
769 {
770 struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
771 struct radeon_winsys_cs_handle *dt;
772 struct ruvd_buffer *msg_fb_buf, *bs_buf;
773 unsigned bs_size;
774
775 assert(decoder);
776
777 if (!dec->bs_ptr)
778 return;
779
780 msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
781 bs_buf = &dec->bs_buffers[dec->cur_buffer];
782
783 bs_size = align(dec->bs_size, 128);
784 memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
785 dec->ws->buffer_unmap(bs_buf->cs_handle);
786
787 map_msg_fb_buf(dec);
788 dec->msg->size = sizeof(*dec->msg);
789 dec->msg->msg_type = RUVD_MSG_DECODE;
790 dec->msg->stream_handle = dec->stream_handle;
791 dec->msg->status_report_feedback_number = dec->frame_number;
792
793 dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
794 dec->msg->body.decode.decode_flags = 0x1;
795 dec->msg->body.decode.width_in_samples = dec->base.width;
796 dec->msg->body.decode.height_in_samples = dec->base.height;
797
798 dec->msg->body.decode.dpb_size = dec->dpb.buf->size;
799 dec->msg->body.decode.bsd_size = bs_size;
800
801 dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
802
803 switch (u_reduce_video_profile(picture->profile)) {
804 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
805 dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
806 break;
807
808 case PIPE_VIDEO_FORMAT_VC1:
809 dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
810 break;
811
812 case PIPE_VIDEO_FORMAT_MPEG12:
813 dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
814 break;
815
816 case PIPE_VIDEO_FORMAT_MPEG4:
817 dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
818 break;
819
820 default:
821 assert(0);
822 return;
823 }
824
825 dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
826 dec->msg->body.decode.extension_support = 0x1;
827
828 /* set at least the feedback buffer size */
829 dec->fb[0] = FB_BUFFER_SIZE;
830
831 send_msg_buf(dec);
832
833 send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.cs_handle, 0,
834 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
835 send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->cs_handle,
836 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
837 send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
838 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
839 send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->cs_handle,
840 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
841 set_reg(dec, RUVD_ENGINE_CNTL, 1);
842
843 flush(dec);
844 next_buffer(dec);
845 }
846
847 /**
848 * flush any outstanding command buffers to the hardware
849 */
850 static void ruvd_flush(struct pipe_video_codec *decoder)
851 {
852 }
853
854 /**
855 * create and UVD decoder
856 */
857 struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
858 const struct pipe_video_codec *templ,
859 ruvd_set_dtb set_dtb)
860 {
861 struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
862 unsigned dpb_size = calc_dpb_size(templ);
863 unsigned width = templ->width, height = templ->height;
864 unsigned bs_buf_size;
865 struct radeon_info info;
866 struct ruvd_decoder *dec;
867 int i;
868
869 ws->query_info(ws, &info);
870
871 switch(u_reduce_video_profile(templ->profile)) {
872 case PIPE_VIDEO_FORMAT_MPEG12:
873 if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
874 return vl_create_mpeg12_decoder(context, templ);
875
876 /* fall through */
877 case PIPE_VIDEO_FORMAT_MPEG4:
878 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
879 width = align(width, VL_MACROBLOCK_WIDTH);
880 height = align(height, VL_MACROBLOCK_HEIGHT);
881 break;
882
883 default:
884 break;
885 }
886
887
888 dec = CALLOC_STRUCT(ruvd_decoder);
889
890 if (!dec)
891 return NULL;
892
893 dec->base = *templ;
894 dec->base.context = context;
895 dec->base.width = width;
896 dec->base.height = height;
897
898 dec->base.destroy = ruvd_destroy;
899 dec->base.begin_frame = ruvd_begin_frame;
900 dec->base.decode_macroblock = ruvd_decode_macroblock;
901 dec->base.decode_bitstream = ruvd_decode_bitstream;
902 dec->base.end_frame = ruvd_end_frame;
903 dec->base.flush = ruvd_flush;
904
905 dec->set_dtb = set_dtb;
906 dec->stream_handle = alloc_stream_handle();
907 dec->ws = ws;
908 dec->cs = ws->cs_create(ws, RING_UVD, NULL);
909 if (!dec->cs) {
910 RUVD_ERR("Can't get command submission context.\n");
911 goto error;
912 }
913
914 bs_buf_size = width * height * 512 / (16 * 16);
915 for (i = 0; i < NUM_BUFFERS; ++i) {
916 unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
917 STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
918 if (!create_buffer(dec, &dec->msg_fb_buffers[i], msg_fb_size)) {
919 RUVD_ERR("Can't allocated message buffers.\n");
920 goto error;
921 }
922
923 if (!create_buffer(dec, &dec->bs_buffers[i], bs_buf_size)) {
924 RUVD_ERR("Can't allocated bitstream buffers.\n");
925 goto error;
926 }
927
928 clear_buffer(dec, &dec->msg_fb_buffers[i]);
929 clear_buffer(dec, &dec->bs_buffers[i]);
930 }
931
932 if (!create_buffer(dec, &dec->dpb, dpb_size)) {
933 RUVD_ERR("Can't allocated dpb.\n");
934 goto error;
935 }
936
937 clear_buffer(dec, &dec->dpb);
938
939 map_msg_fb_buf(dec);
940 dec->msg->size = sizeof(*dec->msg);
941 dec->msg->msg_type = RUVD_MSG_CREATE;
942 dec->msg->stream_handle = dec->stream_handle;
943 dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
944 dec->msg->body.create.width_in_samples = dec->base.width;
945 dec->msg->body.create.height_in_samples = dec->base.height;
946 dec->msg->body.create.dpb_size = dec->dpb.buf->size;
947 send_msg_buf(dec);
948 flush(dec);
949 next_buffer(dec);
950
951 return &dec->base;
952
953 error:
954 if (dec->cs) dec->ws->cs_destroy(dec->cs);
955
956 for (i = 0; i < NUM_BUFFERS; ++i) {
957 destroy_buffer(&dec->msg_fb_buffers[i]);
958 destroy_buffer(&dec->bs_buffers[i]);
959 }
960
961 destroy_buffer(&dec->dpb);
962
963 FREE(dec);
964
965 return NULL;
966 }
967
968 /**
969 * join surfaces into the same buffer with identical tiling params
970 * sumup their sizes and replace the backend buffers with a single bo
971 */
972 void ruvd_join_surfaces(struct radeon_winsys* ws, unsigned bind,
973 struct pb_buffer** buffers[VL_NUM_COMPONENTS],
974 struct radeon_surface *surfaces[VL_NUM_COMPONENTS])
975 {
976 unsigned best_tiling, best_wh, off;
977 unsigned size, alignment;
978 struct pb_buffer *pb;
979 unsigned i, j;
980
981 for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
982 unsigned wh;
983
984 if (!surfaces[i])
985 continue;
986
987 /* choose the smallest bank w/h for now */
988 wh = surfaces[i]->bankw * surfaces[i]->bankh;
989 if (wh < best_wh) {
990 best_wh = wh;
991 best_tiling = i;
992 }
993 }
994
995 for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
996 if (!surfaces[i])
997 continue;
998
999 /* copy the tiling parameters */
1000 surfaces[i]->bankw = surfaces[best_tiling]->bankw;
1001 surfaces[i]->bankh = surfaces[best_tiling]->bankh;
1002 surfaces[i]->mtilea = surfaces[best_tiling]->mtilea;
1003 surfaces[i]->tile_split = surfaces[best_tiling]->tile_split;
1004
1005 /* adjust the texture layer offsets */
1006 off = align(off, surfaces[i]->bo_alignment);
1007 for (j = 0; j < Elements(surfaces[i]->level); ++j)
1008 surfaces[i]->level[j].offset += off;
1009 off += surfaces[i]->bo_size;
1010 }
1011
1012 for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
1013 if (!buffers[i] || !*buffers[i])
1014 continue;
1015
1016 size = align(size, (*buffers[i])->alignment);
1017 size += (*buffers[i])->size;
1018 alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
1019 }
1020
1021 if (!size)
1022 return;
1023
1024 /* TODO: 2D tiling workaround */
1025 alignment *= 2;
1026
1027 pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM);
1028 if (!pb)
1029 return;
1030
1031 for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
1032 if (!buffers[i] || !*buffers[i])
1033 continue;
1034
1035 pb_reference(buffers[i], pb);
1036 }
1037
1038 pb_reference(&pb, NULL);
1039 }
1040
1041 /* calculate top/bottom offset */
1042 static unsigned texture_offset(struct radeon_surface *surface, unsigned layer)
1043 {
1044 return surface->level[0].offset +
1045 layer * surface->level[0].slice_size;
1046 }
1047
1048 /* hw encode the aspect of macro tiles */
1049 static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
1050 {
1051 switch (macro_tile_aspect) {
1052 default:
1053 case 1: macro_tile_aspect = 0; break;
1054 case 2: macro_tile_aspect = 1; break;
1055 case 4: macro_tile_aspect = 2; break;
1056 case 8: macro_tile_aspect = 3; break;
1057 }
1058 return macro_tile_aspect;
1059 }
1060
1061 /* hw encode the bank width and height */
1062 static unsigned bank_wh(unsigned bankwh)
1063 {
1064 switch (bankwh) {
1065 default:
1066 case 1: bankwh = 0; break;
1067 case 2: bankwh = 1; break;
1068 case 4: bankwh = 2; break;
1069 case 8: bankwh = 3; break;
1070 }
1071 return bankwh;
1072 }
1073
1074 /**
1075 * fill decoding target field from the luma and chroma surfaces
1076 */
1077 void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma,
1078 struct radeon_surface *chroma)
1079 {
1080 msg->body.decode.dt_pitch = luma->level[0].pitch_bytes;
1081 switch (luma->level[0].mode) {
1082 case RADEON_SURF_MODE_LINEAR_ALIGNED:
1083 msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
1084 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
1085 break;
1086 case RADEON_SURF_MODE_1D:
1087 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1088 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
1089 break;
1090 case RADEON_SURF_MODE_2D:
1091 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1092 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
1093 break;
1094 default:
1095 assert(0);
1096 break;
1097 }
1098
1099 msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
1100 msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
1101 if (msg->body.decode.dt_field_mode) {
1102 msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
1103 msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
1104 } else {
1105 msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
1106 msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
1107 }
1108
1109 assert(luma->bankw == chroma->bankw);
1110 assert(luma->bankh == chroma->bankh);
1111 assert(luma->mtilea == chroma->mtilea);
1112
1113 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw));
1114 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh));
1115 msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea));
1116 }
1117
1118 int ruvd_get_video_param(struct pipe_screen *screen,
1119 enum pipe_video_profile profile,
1120 enum pipe_video_entrypoint entrypoint,
1121 enum pipe_video_cap param)
1122 {
1123 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
1124
1125 /* UVD 2.x limits */
1126 if (rscreen->family < CHIP_PALM) {
1127 enum pipe_video_format codec = u_reduce_video_profile(profile);
1128 switch (param) {
1129 case PIPE_VIDEO_CAP_SUPPORTED:
1130 /* no support for MPEG4 */
1131 return codec != PIPE_VIDEO_FORMAT_MPEG4;
1132 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
1133 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
1134 /* and MPEG2 only with shaders */
1135 return codec != PIPE_VIDEO_FORMAT_MPEG12;
1136 default:
1137 break;
1138 }
1139 }
1140
1141 switch (param) {
1142 case PIPE_VIDEO_CAP_SUPPORTED:
1143 switch (u_reduce_video_profile(profile)) {
1144 case PIPE_VIDEO_FORMAT_MPEG12:
1145 case PIPE_VIDEO_FORMAT_MPEG4:
1146 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
1147 return true;
1148 case PIPE_VIDEO_FORMAT_VC1:
1149 /* FIXME: VC-1 simple/main profile is broken */
1150 return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED;
1151 default:
1152 return false;
1153 }
1154 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
1155 return 1;
1156 case PIPE_VIDEO_CAP_MAX_WIDTH:
1157 return 2048;
1158 case PIPE_VIDEO_CAP_MAX_HEIGHT:
1159 return 1152;
1160 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
1161 return PIPE_FORMAT_NV12;
1162 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
1163 return true;
1164 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
1165 return true;
1166 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
1167 return true;
1168 case PIPE_VIDEO_CAP_MAX_LEVEL:
1169 switch (profile) {
1170 case PIPE_VIDEO_PROFILE_MPEG1:
1171 return 0;
1172 case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
1173 case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
1174 return 3;
1175 case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
1176 return 3;
1177 case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
1178 return 5;
1179 case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
1180 return 1;
1181 case PIPE_VIDEO_PROFILE_VC1_MAIN:
1182 return 2;
1183 case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
1184 return 4;
1185 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1186 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1187 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1188 return 41;
1189 default:
1190 return 0;
1191 }
1192 default:
1193 return 0;
1194 }
1195 }
1196
1197 boolean ruvd_is_format_supported(struct pipe_screen *screen,
1198 enum pipe_format format,
1199 enum pipe_video_profile profile,
1200 enum pipe_video_entrypoint entrypoint)
1201 {
1202 /* we can only handle this one with UVD */
1203 if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
1204 return format == PIPE_FORMAT_NV12;
1205
1206 return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
1207 }