1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Christian König <christian.koenig@amd.com>
34 #include <sys/types.h>
40 #include "pipe/p_video_codec.h"
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
48 #include "r600_pipe_common.h"
49 #include "radeon_video.h"
50 #include "radeon_uvd.h"
54 #define NUM_MPEG2_REFS 6
55 #define NUM_H264_REFS 17
56 #define NUM_VC1_REFS 5
58 #define FB_BUFFER_OFFSET 0x1000
59 #define FB_BUFFER_SIZE 2048
60 #define FB_BUFFER_SIZE_TONGA (2048 * 64)
61 #define IT_SCALING_TABLE_SIZE 992
62 #define UVD_SESSION_CONTEXT_SIZE (128 * 1024)
64 /* UVD decoder representation */
66 struct pipe_video_codec base
;
70 unsigned stream_handle
;
72 unsigned frame_number
;
74 struct pipe_screen
*screen
;
75 struct radeon_winsys
* ws
;
76 struct radeon_cmdbuf
* cs
;
80 struct rvid_buffer msg_fb_it_buffers
[NUM_BUFFERS
];
86 struct rvid_buffer bs_buffers
[NUM_BUFFERS
];
90 struct rvid_buffer dpb
;
92 struct rvid_buffer ctx
;
93 struct rvid_buffer sessionctx
;
102 /* flush IB to the hardware */
103 static int flush(struct ruvd_decoder
*dec
, unsigned flags
)
105 return dec
->ws
->cs_flush(dec
->cs
, flags
, NULL
);
108 /* add a new set register command to the IB */
109 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
111 radeon_emit(dec
->cs
, RUVD_PKT0(reg
>> 2, 0));
112 radeon_emit(dec
->cs
, val
);
115 /* send a command to the VCPU through the GPCOM registers */
116 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
117 struct pb_buffer
* buf
, uint32_t off
,
118 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
122 reloc_idx
= dec
->ws
->cs_add_buffer(dec
->cs
, buf
, usage
| RADEON_USAGE_SYNCHRONIZED
,
124 if (!dec
->use_legacy
) {
126 addr
= dec
->ws
->buffer_get_virtual_address(buf
);
128 set_reg(dec
, dec
->reg
.data0
, addr
);
129 set_reg(dec
, dec
->reg
.data1
, addr
>> 32);
131 off
+= dec
->ws
->buffer_get_reloc_offset(buf
);
132 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
133 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
135 set_reg(dec
, dec
->reg
.cmd
, cmd
<< 1);
138 /* do the codec needs an IT buffer ?*/
139 static bool have_it(struct ruvd_decoder
*dec
)
141 return dec
->stream_type
== RUVD_CODEC_H264_PERF
||
142 dec
->stream_type
== RUVD_CODEC_H265
;
145 /* map the next available message/feedback/itscaling buffer */
146 static void map_msg_fb_it_buf(struct ruvd_decoder
*dec
)
148 struct rvid_buffer
* buf
;
151 /* grab the current message/feedback buffer */
152 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
154 /* and map it for CPU access */
155 ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
156 PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
158 /* calc buffer offsets */
159 dec
->msg
= (struct ruvd_msg
*)ptr
;
160 memset(dec
->msg
, 0, sizeof(*dec
->msg
));
162 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
164 dec
->it
= (uint8_t *)(ptr
+ FB_BUFFER_OFFSET
+ dec
->fb_size
);
167 /* unmap and send a message command to the VCPU */
168 static void send_msg_buf(struct ruvd_decoder
*dec
)
170 struct rvid_buffer
* buf
;
172 /* ignore the request if message/feedback buffer isn't mapped */
173 if (!dec
->msg
|| !dec
->fb
)
176 /* grab the current message buffer */
177 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
179 /* unmap the buffer */
180 dec
->ws
->buffer_unmap(buf
->res
->buf
);
186 if (dec
->sessionctx
.res
)
187 send_cmd(dec
, RUVD_CMD_SESSION_CONTEXT_BUFFER
,
188 dec
->sessionctx
.res
->buf
, 0, RADEON_USAGE_READWRITE
,
191 /* and send it to the hardware */
192 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->res
->buf
, 0,
193 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
196 /* cycle to the next set of buffers */
197 static void next_buffer(struct ruvd_decoder
*dec
)
200 dec
->cur_buffer
%= NUM_BUFFERS
;
203 /* convert the profile into something UVD understands */
204 static uint32_t profile2stream_type(struct ruvd_decoder
*dec
, unsigned family
)
206 switch (u_reduce_video_profile(dec
->base
.profile
)) {
207 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
208 return RUVD_CODEC_H264
;
210 case PIPE_VIDEO_FORMAT_VC1
:
211 return RUVD_CODEC_VC1
;
213 case PIPE_VIDEO_FORMAT_MPEG12
:
214 return RUVD_CODEC_MPEG2
;
216 case PIPE_VIDEO_FORMAT_MPEG4
:
217 return RUVD_CODEC_MPEG4
;
219 case PIPE_VIDEO_FORMAT_JPEG
:
220 return RUVD_CODEC_MJPEG
;
229 static unsigned get_db_pitch_alignment(struct ruvd_decoder
*dec
)
234 /* calculate size of reference picture buffer */
235 static unsigned calc_dpb_size(struct ruvd_decoder
*dec
)
237 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
239 // always align them to MB size for dpb calculation
240 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
241 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
243 // always one more for currently decoded picture
244 unsigned max_references
= dec
->base
.max_references
+ 1;
246 // aligned size of a single frame
247 image_size
= align(width
, get_db_pitch_alignment(dec
)) * height
;
248 image_size
+= image_size
/ 2;
249 image_size
= align(image_size
, 1024);
251 // picture width & height in 16 pixel units
252 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
253 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
255 switch (u_reduce_video_profile(dec
->base
.profile
)) {
256 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
257 if (!dec
->use_legacy
) {
258 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
259 unsigned alignment
= 64, num_dpb_buffer
;
261 if (dec
->stream_type
== RUVD_CODEC_H264_PERF
)
263 switch(dec
->base
.level
) {
265 num_dpb_buffer
= 8100 / fs_in_mb
;
268 num_dpb_buffer
= 18000 / fs_in_mb
;
271 num_dpb_buffer
= 20480 / fs_in_mb
;
274 num_dpb_buffer
= 32768 / fs_in_mb
;
277 num_dpb_buffer
= 34816 / fs_in_mb
;
280 num_dpb_buffer
= 110400 / fs_in_mb
;
283 num_dpb_buffer
= 184320 / fs_in_mb
;
286 num_dpb_buffer
= 184320 / fs_in_mb
;
290 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
291 dpb_size
= image_size
* max_references
;
292 if ((dec
->stream_type
!= RUVD_CODEC_H264_PERF
)) {
293 dpb_size
+= max_references
* align(width_in_mb
* height_in_mb
* 192, alignment
);
294 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, alignment
);
297 // the firmware seems to allways assume a minimum of ref frames
298 max_references
= MAX2(NUM_H264_REFS
, max_references
);
299 // reference picture buffer
300 dpb_size
= image_size
* max_references
;
301 if ((dec
->stream_type
!= RUVD_CODEC_H264_PERF
)) {
302 // macroblock context buffer
303 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
305 dpb_size
+= width_in_mb
* height_in_mb
* 32;
311 case PIPE_VIDEO_FORMAT_VC1
:
312 // the firmware seems to allways assume a minimum of ref frames
313 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
315 // reference picture buffer
316 dpb_size
= image_size
* max_references
;
319 dpb_size
+= width_in_mb
* height_in_mb
* 128;
322 dpb_size
+= width_in_mb
* 64;
325 dpb_size
+= width_in_mb
* 128;
328 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
331 case PIPE_VIDEO_FORMAT_MPEG12
:
332 // reference picture buffer, must be big enough for all frames
333 dpb_size
= image_size
* NUM_MPEG2_REFS
;
336 case PIPE_VIDEO_FORMAT_MPEG4
:
337 // reference picture buffer
338 dpb_size
= image_size
* max_references
;
341 dpb_size
+= width_in_mb
* height_in_mb
* 64;
344 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
346 dpb_size
= MAX2(dpb_size
, 30 * 1024 * 1024);
349 case PIPE_VIDEO_FORMAT_JPEG
:
354 // something is missing here
357 // at least use a sane default value
358 dpb_size
= 32 * 1024 * 1024;
364 /* free associated data in the video buffer callback */
365 static void ruvd_destroy_associated_data(void *data
)
367 /* NOOP, since we only use an intptr */
370 /* get h264 specific message bits */
371 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
373 struct ruvd_h264 result
;
375 memset(&result
, 0, sizeof(result
));
376 switch (pic
->base
.profile
) {
377 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
378 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE
:
379 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
382 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
383 result
.profile
= RUVD_H264_PROFILE_MAIN
;
386 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
387 result
.profile
= RUVD_H264_PROFILE_HIGH
;
395 result
.level
= dec
->base
.level
;
397 result
.sps_info_flags
= 0;
398 result
.sps_info_flags
|= pic
->pps
->sps
->direct_8x8_inference_flag
<< 0;
399 result
.sps_info_flags
|= pic
->pps
->sps
->mb_adaptive_frame_field_flag
<< 1;
400 result
.sps_info_flags
|= pic
->pps
->sps
->frame_mbs_only_flag
<< 2;
401 result
.sps_info_flags
|= pic
->pps
->sps
->delta_pic_order_always_zero_flag
<< 3;
403 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
404 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
405 result
.log2_max_frame_num_minus4
= pic
->pps
->sps
->log2_max_frame_num_minus4
;
406 result
.pic_order_cnt_type
= pic
->pps
->sps
->pic_order_cnt_type
;
407 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
409 switch (dec
->base
.chroma_format
) {
410 case PIPE_VIDEO_CHROMA_FORMAT_NONE
:
413 case PIPE_VIDEO_CHROMA_FORMAT_400
:
414 result
.chroma_format
= 0;
416 case PIPE_VIDEO_CHROMA_FORMAT_420
:
417 result
.chroma_format
= 1;
419 case PIPE_VIDEO_CHROMA_FORMAT_422
:
420 result
.chroma_format
= 2;
422 case PIPE_VIDEO_CHROMA_FORMAT_444
:
423 result
.chroma_format
= 3;
427 result
.pps_info_flags
= 0;
428 result
.pps_info_flags
|= pic
->pps
->transform_8x8_mode_flag
<< 0;
429 result
.pps_info_flags
|= pic
->pps
->redundant_pic_cnt_present_flag
<< 1;
430 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 2;
431 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_control_present_flag
<< 3;
432 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_idc
<< 4;
433 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 6;
434 result
.pps_info_flags
|= pic
->pps
->bottom_field_pic_order_in_frame_present_flag
<< 7;
435 result
.pps_info_flags
|= pic
->pps
->entropy_coding_mode_flag
<< 8;
437 result
.num_slice_groups_minus1
= pic
->pps
->num_slice_groups_minus1
;
438 result
.slice_group_map_type
= pic
->pps
->slice_group_map_type
;
439 result
.slice_group_change_rate_minus1
= pic
->pps
->slice_group_change_rate_minus1
;
440 result
.pic_init_qp_minus26
= pic
->pps
->pic_init_qp_minus26
;
441 result
.chroma_qp_index_offset
= pic
->pps
->chroma_qp_index_offset
;
442 result
.second_chroma_qp_index_offset
= pic
->pps
->second_chroma_qp_index_offset
;
444 memcpy(result
.scaling_list_4x4
, pic
->pps
->ScalingList4x4
, 6*16);
445 memcpy(result
.scaling_list_8x8
, pic
->pps
->ScalingList8x8
, 2*64);
447 if (dec
->stream_type
== RUVD_CODEC_H264_PERF
) {
448 memcpy(dec
->it
, result
.scaling_list_4x4
, 6*16);
449 memcpy((dec
->it
+ 96), result
.scaling_list_8x8
, 2*64);
452 result
.num_ref_frames
= pic
->num_ref_frames
;
454 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
455 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
457 result
.frame_num
= pic
->frame_num
;
458 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
459 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
460 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
461 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
463 result
.decoded_pic_idx
= pic
->frame_num
;
468 /* get vc1 specific message bits */
469 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
471 struct ruvd_vc1 result
;
473 memset(&result
, 0, sizeof(result
));
475 switch(pic
->base
.profile
) {
476 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
477 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
481 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
482 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
486 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
487 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
495 /* fields common for all profiles */
496 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
497 result
.sps_info_flags
|= pic
->pulldown
<< 6;
498 result
.sps_info_flags
|= pic
->interlace
<< 5;
499 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
500 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
501 result
.sps_info_flags
|= pic
->psf
<< 1;
503 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
504 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
505 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
506 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
507 result
.pps_info_flags
|= pic
->multires
<< 21;
508 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
509 result
.pps_info_flags
|= pic
->overlap
<< 11;
510 result
.pps_info_flags
|= pic
->quantizer
<< 9;
511 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
512 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
513 result
.pps_info_flags
|= pic
->vstransform
<< 0;
515 /* some fields only apply to main/advanced profile */
516 if (pic
->base
.profile
!= PIPE_VIDEO_PROFILE_VC1_SIMPLE
) {
517 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
518 result
.pps_info_flags
|= pic
->rangered
<< 19;
519 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
520 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
521 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
522 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
523 result
.pps_info_flags
|= pic
->dquant
<< 1;
526 result
.chroma_format
= 1;
529 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
532 uint8_t frame_coding_mode
533 uint8_t deblockEnable
540 /* extract the frame number from a referenced video buffer */
541 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
543 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
544 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
547 /* seems to be the most sane fallback */
551 /* get the frame number from the associated data */
552 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
554 /* limit the frame number to a valid range */
555 return MAX2(MIN2(frame
, max
), min
);
558 /* get mpeg2 specific msg bits */
559 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
560 struct pipe_mpeg12_picture_desc
*pic
)
562 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
563 struct ruvd_mpeg2 result
;
566 memset(&result
, 0, sizeof(result
));
567 result
.decoded_pic_idx
= dec
->frame_number
;
568 for (i
= 0; i
< 2; ++i
)
569 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
571 result
.load_intra_quantiser_matrix
= 1;
572 result
.load_nonintra_quantiser_matrix
= 1;
574 for (i
= 0; i
< 64; ++i
) {
575 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
576 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
579 result
.profile_and_level_indication
= 0;
580 result
.chroma_format
= 0x1;
582 result
.picture_coding_type
= pic
->picture_coding_type
;
583 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
584 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
585 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
586 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
587 result
.intra_dc_precision
= pic
->intra_dc_precision
;
588 result
.pic_structure
= pic
->picture_structure
;
589 result
.top_field_first
= pic
->top_field_first
;
590 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
591 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
592 result
.q_scale_type
= pic
->q_scale_type
;
593 result
.intra_vlc_format
= pic
->intra_vlc_format
;
594 result
.alternate_scan
= pic
->alternate_scan
;
599 /* get mpeg4 specific msg bits */
600 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
601 struct pipe_mpeg4_picture_desc
*pic
)
603 struct ruvd_mpeg4 result
;
606 memset(&result
, 0, sizeof(result
));
607 result
.decoded_pic_idx
= dec
->frame_number
;
608 for (i
= 0; i
< 2; ++i
)
609 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
611 result
.variant_type
= 0;
612 result
.profile_and_level_indication
= 0xF0; // ASP Level0
614 result
.video_object_layer_verid
= 0x5; // advanced simple
615 result
.video_object_layer_shape
= 0x0; // rectangular
617 result
.video_object_layer_width
= dec
->base
.width
;
618 result
.video_object_layer_height
= dec
->base
.height
;
620 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
622 result
.flags
|= pic
->short_video_header
<< 0;
623 //result.flags |= obmc_disable << 1;
624 result
.flags
|= pic
->interlaced
<< 2;
625 result
.flags
|= 1 << 3; // load_intra_quant_mat
626 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
627 result
.flags
|= pic
->quarter_sample
<< 5;
628 result
.flags
|= 1 << 6; // complexity_estimation_disable
629 result
.flags
|= pic
->resync_marker_disable
<< 7;
630 //result.flags |= data_partitioned << 8;
631 //result.flags |= reversible_vlc << 9;
632 result
.flags
|= 0 << 10; // newpred_enable
633 result
.flags
|= 0 << 11; // reduced_resolution_vop_enable
634 //result.flags |= scalability << 12;
635 //result.flags |= is_object_layer_identifier << 13;
636 //result.flags |= fixed_vop_rate << 14;
637 //result.flags |= newpred_segment_type << 15;
639 result
.quant_type
= pic
->quant_type
;
641 for (i
= 0; i
< 64; ++i
) {
642 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
643 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
649 uint8_t vop_coding_type
650 uint8_t vop_fcode_forward
651 uint8_t vop_fcode_backward
652 uint8_t rounding_control
653 uint8_t alternate_vertical_scan_flag
654 uint8_t top_field_first
660 static void get_mjpeg_slice_header(struct ruvd_decoder
*dec
, struct pipe_mjpeg_picture_desc
*pic
)
662 int size
= 0, saved_size
, len_pos
, i
;
664 uint8_t *buf
= dec
->bs_ptr
;
677 for (i
= 0; i
< 4; ++i
) {
678 if (pic
->quantization_table
.load_quantiser_table
[i
] == 0)
682 memcpy((buf
+ size
), &pic
->quantization_table
.quantiser_table
[i
], 64);
686 bs
= (uint16_t*)&buf
[len_pos
];
687 *bs
= util_bswap16(size
- 4);
698 for (i
= 0; i
< 2; ++i
) {
699 if (pic
->huffman_table
.load_huffman_table
[i
] == 0)
702 buf
[size
++] = 0x00 | i
;
703 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].num_dc_codes
, 16);
705 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].dc_values
, 12);
709 for (i
= 0; i
< 2; ++i
) {
710 if (pic
->huffman_table
.load_huffman_table
[i
] == 0)
713 buf
[size
++] = 0x10 | i
;
714 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].num_ac_codes
, 16);
716 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].ac_values
, 162);
720 bs
= (uint16_t*)&buf
[len_pos
];
721 *bs
= util_bswap16(size
- saved_size
- 2);
726 if (pic
->slice_parameter
.restart_interval
) {
731 bs
= (uint16_t*)&buf
[size
++];
732 *bs
= util_bswap16(pic
->slice_parameter
.restart_interval
);
745 bs
= (uint16_t*)&buf
[size
++];
746 *bs
= util_bswap16(pic
->picture_parameter
.picture_height
);
749 bs
= (uint16_t*)&buf
[size
++];
750 *bs
= util_bswap16(pic
->picture_parameter
.picture_width
);
753 buf
[size
++] = pic
->picture_parameter
.num_components
;
755 for (i
= 0; i
< pic
->picture_parameter
.num_components
; ++i
) {
756 buf
[size
++] = pic
->picture_parameter
.components
[i
].component_id
;
757 buf
[size
++] = pic
->picture_parameter
.components
[i
].h_sampling_factor
<< 4 |
758 pic
->picture_parameter
.components
[i
].v_sampling_factor
;
759 buf
[size
++] = pic
->picture_parameter
.components
[i
].quantiser_table_selector
;
762 bs
= (uint16_t*)&buf
[len_pos
];
763 *bs
= util_bswap16(size
- saved_size
- 2);
774 buf
[size
++] = pic
->slice_parameter
.num_components
;
776 for (i
= 0; i
< pic
->slice_parameter
.num_components
; ++i
) {
777 buf
[size
++] = pic
->slice_parameter
.components
[i
].component_selector
;
778 buf
[size
++] = pic
->slice_parameter
.components
[i
].dc_table_selector
<< 4 |
779 pic
->slice_parameter
.components
[i
].ac_table_selector
;
786 bs
= (uint16_t*)&buf
[len_pos
];
787 *bs
= util_bswap16(size
- saved_size
- 2);
790 dec
->bs_size
+= size
;
794 * destroy this video decoder
796 static void ruvd_destroy(struct pipe_video_codec
*decoder
)
798 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
803 map_msg_fb_it_buf(dec
);
804 dec
->msg
->size
= sizeof(*dec
->msg
);
805 dec
->msg
->msg_type
= RUVD_MSG_DESTROY
;
806 dec
->msg
->stream_handle
= dec
->stream_handle
;
811 dec
->ws
->cs_destroy(dec
->cs
);
813 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
814 rvid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
815 rvid_destroy_buffer(&dec
->bs_buffers
[i
]);
818 rvid_destroy_buffer(&dec
->dpb
);
819 rvid_destroy_buffer(&dec
->ctx
);
820 rvid_destroy_buffer(&dec
->sessionctx
);
826 * start decoding of a new frame
828 static void ruvd_begin_frame(struct pipe_video_codec
*decoder
,
829 struct pipe_video_buffer
*target
,
830 struct pipe_picture_desc
*picture
)
832 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
837 frame
= ++dec
->frame_number
;
838 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
839 &ruvd_destroy_associated_data
);
842 dec
->bs_ptr
= dec
->ws
->buffer_map(
843 dec
->bs_buffers
[dec
->cur_buffer
].res
->buf
,
844 dec
->cs
, PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
848 * decode a macroblock
850 static void ruvd_decode_macroblock(struct pipe_video_codec
*decoder
,
851 struct pipe_video_buffer
*target
,
852 struct pipe_picture_desc
*picture
,
853 const struct pipe_macroblock
*macroblocks
,
854 unsigned num_macroblocks
)
856 /* not supported (yet) */
863 static void ruvd_decode_bitstream(struct pipe_video_codec
*decoder
,
864 struct pipe_video_buffer
*target
,
865 struct pipe_picture_desc
*picture
,
866 unsigned num_buffers
,
867 const void * const *buffers
,
868 const unsigned *sizes
)
870 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
871 enum pipe_video_format format
= u_reduce_video_profile(picture
->profile
);
879 if (format
== PIPE_VIDEO_FORMAT_JPEG
)
880 get_mjpeg_slice_header(dec
, (struct pipe_mjpeg_picture_desc
*)picture
);
882 for (i
= 0; i
< num_buffers
; ++i
) {
883 struct rvid_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
884 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
886 if (format
== PIPE_VIDEO_FORMAT_JPEG
)
887 new_size
+= 2; /* save for EOI */
889 if (new_size
> buf
->res
->buf
->size
) {
890 dec
->ws
->buffer_unmap(buf
->res
->buf
);
891 if (!rvid_resize_buffer(dec
->screen
, dec
->cs
, buf
, new_size
)) {
892 RVID_ERR("Can't resize bitstream buffer!");
896 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
897 PIPE_TRANSFER_WRITE
|
898 RADEON_TRANSFER_TEMPORARY
);
902 dec
->bs_ptr
+= dec
->bs_size
;
905 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
906 dec
->bs_size
+= sizes
[i
];
907 dec
->bs_ptr
+= sizes
[i
];
910 if (format
== PIPE_VIDEO_FORMAT_JPEG
) {
911 ((uint8_t *)dec
->bs_ptr
)[0] = 0xff; /* EOI */
912 ((uint8_t *)dec
->bs_ptr
)[1] = 0xd9;
919 * end decoding of the current frame
921 static void ruvd_end_frame(struct pipe_video_codec
*decoder
,
922 struct pipe_video_buffer
*target
,
923 struct pipe_picture_desc
*picture
)
925 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
926 struct pb_buffer
*dt
;
927 struct rvid_buffer
*msg_fb_it_buf
, *bs_buf
;
935 msg_fb_it_buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
936 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
938 bs_size
= align(dec
->bs_size
, 128);
939 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
940 dec
->ws
->buffer_unmap(bs_buf
->res
->buf
);
942 map_msg_fb_it_buf(dec
);
943 dec
->msg
->size
= sizeof(*dec
->msg
);
944 dec
->msg
->msg_type
= RUVD_MSG_DECODE
;
945 dec
->msg
->stream_handle
= dec
->stream_handle
;
946 dec
->msg
->status_report_feedback_number
= dec
->frame_number
;
948 dec
->msg
->body
.decode
.stream_type
= dec
->stream_type
;
949 dec
->msg
->body
.decode
.decode_flags
= 0x1;
950 dec
->msg
->body
.decode
.width_in_samples
= dec
->base
.width
;
951 dec
->msg
->body
.decode
.height_in_samples
= dec
->base
.height
;
953 if ((picture
->profile
== PIPE_VIDEO_PROFILE_VC1_SIMPLE
) ||
954 (picture
->profile
== PIPE_VIDEO_PROFILE_VC1_MAIN
)) {
955 dec
->msg
->body
.decode
.width_in_samples
= align(dec
->msg
->body
.decode
.width_in_samples
, 16) / 16;
956 dec
->msg
->body
.decode
.height_in_samples
= align(dec
->msg
->body
.decode
.height_in_samples
, 16) / 16;
960 dec
->msg
->body
.decode
.dpb_size
= dec
->dpb
.res
->buf
->size
;
961 dec
->msg
->body
.decode
.bsd_size
= bs_size
;
962 dec
->msg
->body
.decode
.db_pitch
= align(dec
->base
.width
, get_db_pitch_alignment(dec
));
964 dt
= dec
->set_dtb(dec
->msg
, (struct vl_video_buffer
*)target
);
966 switch (u_reduce_video_profile(picture
->profile
)) {
967 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
968 dec
->msg
->body
.decode
.codec
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
971 case PIPE_VIDEO_FORMAT_VC1
:
972 dec
->msg
->body
.decode
.codec
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
975 case PIPE_VIDEO_FORMAT_MPEG12
:
976 dec
->msg
->body
.decode
.codec
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
979 case PIPE_VIDEO_FORMAT_MPEG4
:
980 dec
->msg
->body
.decode
.codec
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
983 case PIPE_VIDEO_FORMAT_JPEG
:
991 dec
->msg
->body
.decode
.db_surf_tile_config
= dec
->msg
->body
.decode
.dt_surf_tile_config
;
992 dec
->msg
->body
.decode
.extension_support
= 0x1;
994 /* set at least the feedback buffer size */
995 dec
->fb
[0] = dec
->fb_size
;
1000 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.res
->buf
, 0,
1001 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1004 send_cmd(dec
, RUVD_CMD_CONTEXT_BUFFER
, dec
->ctx
.res
->buf
, 0,
1005 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1006 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->res
->buf
,
1007 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1008 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
1009 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
1010 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_it_buf
->res
->buf
,
1011 FB_BUFFER_OFFSET
, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
1013 send_cmd(dec
, RUVD_CMD_ITSCALING_TABLE_BUFFER
, msg_fb_it_buf
->res
->buf
,
1014 FB_BUFFER_OFFSET
+ dec
->fb_size
, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1015 set_reg(dec
, dec
->reg
.cntl
, 1);
1017 flush(dec
, PIPE_FLUSH_ASYNC
);
1022 * flush any outstanding command buffers to the hardware
1024 static void ruvd_flush(struct pipe_video_codec
*decoder
)
1029 * create and UVD decoder
1031 struct pipe_video_codec
*ruvd_create_decoder(struct pipe_context
*context
,
1032 const struct pipe_video_codec
*templ
,
1033 ruvd_set_dtb set_dtb
)
1035 struct radeon_winsys
* ws
= ((struct r600_common_context
*)context
)->ws
;
1036 struct r600_common_context
*rctx
= (struct r600_common_context
*)context
;
1038 unsigned width
= templ
->width
, height
= templ
->height
;
1039 unsigned bs_buf_size
;
1040 struct radeon_info info
;
1041 struct ruvd_decoder
*dec
;
1044 ws
->query_info(ws
, &info
);
1046 switch(u_reduce_video_profile(templ
->profile
)) {
1047 case PIPE_VIDEO_FORMAT_MPEG12
:
1048 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
|| info
.family
< CHIP_PALM
)
1049 return vl_create_mpeg12_decoder(context
, templ
);
1052 case PIPE_VIDEO_FORMAT_MPEG4
:
1053 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1054 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1056 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1057 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1058 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1066 dec
= CALLOC_STRUCT(ruvd_decoder
);
1071 dec
->use_legacy
= true;
1074 dec
->base
.context
= context
;
1075 dec
->base
.width
= width
;
1076 dec
->base
.height
= height
;
1078 dec
->base
.destroy
= ruvd_destroy
;
1079 dec
->base
.begin_frame
= ruvd_begin_frame
;
1080 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
1081 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
1082 dec
->base
.end_frame
= ruvd_end_frame
;
1083 dec
->base
.flush
= ruvd_flush
;
1085 dec
->stream_type
= profile2stream_type(dec
, info
.family
);
1086 dec
->set_dtb
= set_dtb
;
1087 dec
->stream_handle
= rvid_alloc_stream_handle();
1088 dec
->screen
= context
->screen
;
1090 dec
->cs
= ws
->cs_create(rctx
->ctx
, RING_UVD
, NULL
, NULL
, false);
1092 RVID_ERR("Can't get command submission context.\n");
1096 dec
->fb_size
= FB_BUFFER_SIZE
;
1097 bs_buf_size
= width
* height
* (512 / (16 * 16));
1098 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1099 unsigned msg_fb_it_size
= FB_BUFFER_OFFSET
+ dec
->fb_size
;
1100 STATIC_ASSERT(sizeof(struct ruvd_msg
) <= FB_BUFFER_OFFSET
);
1102 msg_fb_it_size
+= IT_SCALING_TABLE_SIZE
;
1103 if (!rvid_create_buffer(dec
->screen
, &dec
->msg_fb_it_buffers
[i
],
1104 msg_fb_it_size
, PIPE_USAGE_STAGING
)) {
1105 RVID_ERR("Can't allocated message buffers.\n");
1109 if (!rvid_create_buffer(dec
->screen
, &dec
->bs_buffers
[i
],
1110 bs_buf_size
, PIPE_USAGE_STAGING
)) {
1111 RVID_ERR("Can't allocated bitstream buffers.\n");
1115 rvid_clear_buffer(context
, &dec
->msg_fb_it_buffers
[i
]);
1116 rvid_clear_buffer(context
, &dec
->bs_buffers
[i
]);
1119 dpb_size
= calc_dpb_size(dec
);
1121 if (!rvid_create_buffer(dec
->screen
, &dec
->dpb
, dpb_size
, PIPE_USAGE_DEFAULT
)) {
1122 RVID_ERR("Can't allocated dpb.\n");
1125 rvid_clear_buffer(context
, &dec
->dpb
);
1128 dec
->reg
.data0
= RUVD_GPCOM_VCPU_DATA0
;
1129 dec
->reg
.data1
= RUVD_GPCOM_VCPU_DATA1
;
1130 dec
->reg
.cmd
= RUVD_GPCOM_VCPU_CMD
;
1131 dec
->reg
.cntl
= RUVD_ENGINE_CNTL
;
1133 map_msg_fb_it_buf(dec
);
1134 dec
->msg
->size
= sizeof(*dec
->msg
);
1135 dec
->msg
->msg_type
= RUVD_MSG_CREATE
;
1136 dec
->msg
->stream_handle
= dec
->stream_handle
;
1137 dec
->msg
->body
.create
.stream_type
= dec
->stream_type
;
1138 dec
->msg
->body
.create
.width_in_samples
= dec
->base
.width
;
1139 dec
->msg
->body
.create
.height_in_samples
= dec
->base
.height
;
1140 dec
->msg
->body
.create
.dpb_size
= dpb_size
;
1151 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
1153 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1154 rvid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
1155 rvid_destroy_buffer(&dec
->bs_buffers
[i
]);
1158 rvid_destroy_buffer(&dec
->dpb
);
1159 rvid_destroy_buffer(&dec
->ctx
);
1160 rvid_destroy_buffer(&dec
->sessionctx
);
1167 /* calculate top/bottom offset */
1168 static unsigned texture_offset(struct radeon_surf
*surface
, unsigned layer
)
1170 return surface
->u
.legacy
.level
[0].offset
+
1171 layer
* (uint64_t)surface
->u
.legacy
.level
[0].slice_size_dw
* 4;
1174 /* hw encode the aspect of macro tiles */
1175 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
1177 switch (macro_tile_aspect
) {
1179 case 1: macro_tile_aspect
= 0; break;
1180 case 2: macro_tile_aspect
= 1; break;
1181 case 4: macro_tile_aspect
= 2; break;
1182 case 8: macro_tile_aspect
= 3; break;
1184 return macro_tile_aspect
;
1187 /* hw encode the bank width and height */
1188 static unsigned bank_wh(unsigned bankwh
)
1192 case 1: bankwh
= 0; break;
1193 case 2: bankwh
= 1; break;
1194 case 4: bankwh
= 2; break;
1195 case 8: bankwh
= 3; break;
1201 * fill decoding target field from the luma and chroma surfaces
1203 void ruvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surf
*luma
,
1204 struct radeon_surf
*chroma
)
1206 msg
->body
.decode
.dt_pitch
= luma
->u
.legacy
.level
[0].nblk_x
* luma
->blk_w
;
1207 switch (luma
->u
.legacy
.level
[0].mode
) {
1208 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1209 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1210 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1212 case RADEON_SURF_MODE_1D
:
1213 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1214 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
1216 case RADEON_SURF_MODE_2D
:
1217 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1218 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
1225 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0);
1227 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0);
1228 if (msg
->body
.decode
.dt_field_mode
) {
1229 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1);
1231 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1);
1233 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
1234 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
1238 assert(luma
->u
.legacy
.bankw
== chroma
->u
.legacy
.bankw
);
1239 assert(luma
->u
.legacy
.bankh
== chroma
->u
.legacy
.bankh
);
1240 assert(luma
->u
.legacy
.mtilea
== chroma
->u
.legacy
.mtilea
);
1243 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->u
.legacy
.bankw
));
1244 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->u
.legacy
.bankh
));
1245 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->u
.legacy
.mtilea
));