1 /**************************************************************************
3 * Copyright 2017 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 #include "pipe/p_video_codec.h"
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
36 #include "vl/vl_mpeg12_decoder.h"
38 #include "r600_pipe_common.h"
39 #include "radeon_video.h"
40 #include "radeon_vcn_dec.h"
42 #define FB_BUFFER_OFFSET 0x1000
43 #define FB_BUFFER_SIZE 2048
44 #define IT_SCALING_TABLE_SIZE 992
45 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
47 #define RDECODE_GPCOM_VCPU_CMD 0x2070c
48 #define RDECODE_GPCOM_VCPU_DATA0 0x20710
49 #define RDECODE_GPCOM_VCPU_DATA1 0x20714
50 #define RDECODE_ENGINE_CNTL 0x20718
53 #define NUM_MPEG2_REFS 6
54 #define NUM_H264_REFS 17
55 #define NUM_VC1_REFS 5
57 struct radeon_decoder
{
58 struct pipe_video_codec base
;
60 unsigned stream_handle
;
62 unsigned frame_number
;
64 struct pipe_screen
*screen
;
65 struct radeon_winsys
*ws
;
66 struct radeon_winsys_cs
*cs
;
73 struct rvid_buffer msg_fb_it_buffers
[NUM_BUFFERS
];
74 struct rvid_buffer bs_buffers
[NUM_BUFFERS
];
75 struct rvid_buffer dpb
;
76 struct rvid_buffer ctx
;
77 struct rvid_buffer sessionctx
;
83 static rvcn_dec_message_avc_t
get_h264_msg(struct radeon_decoder
*dec
,
84 struct pipe_h264_picture_desc
*pic
)
86 rvcn_dec_message_avc_t result
;
88 memset(&result
, 0, sizeof(result
));
89 switch (pic
->base
.profile
) {
90 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
91 result
.profile
= RDECODE_H264_PROFILE_BASELINE
;
94 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
95 result
.profile
= RDECODE_H264_PROFILE_MAIN
;
98 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
99 result
.profile
= RDECODE_H264_PROFILE_HIGH
;
107 result
.level
= dec
->base
.level
;
109 result
.sps_info_flags
= 0;
110 result
.sps_info_flags
|= pic
->pps
->sps
->direct_8x8_inference_flag
<< 0;
111 result
.sps_info_flags
|= pic
->pps
->sps
->mb_adaptive_frame_field_flag
<< 1;
112 result
.sps_info_flags
|= pic
->pps
->sps
->frame_mbs_only_flag
<< 2;
113 result
.sps_info_flags
|= pic
->pps
->sps
->delta_pic_order_always_zero_flag
<< 3;
114 result
.sps_info_flags
|= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT
;
116 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
117 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
118 result
.log2_max_frame_num_minus4
= pic
->pps
->sps
->log2_max_frame_num_minus4
;
119 result
.pic_order_cnt_type
= pic
->pps
->sps
->pic_order_cnt_type
;
120 result
.log2_max_pic_order_cnt_lsb_minus4
=
121 pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
123 switch (dec
->base
.chroma_format
) {
124 case PIPE_VIDEO_CHROMA_FORMAT_NONE
:
126 case PIPE_VIDEO_CHROMA_FORMAT_400
:
127 result
.chroma_format
= 0;
129 case PIPE_VIDEO_CHROMA_FORMAT_420
:
130 result
.chroma_format
= 1;
132 case PIPE_VIDEO_CHROMA_FORMAT_422
:
133 result
.chroma_format
= 2;
135 case PIPE_VIDEO_CHROMA_FORMAT_444
:
136 result
.chroma_format
= 3;
140 result
.pps_info_flags
= 0;
141 result
.pps_info_flags
|= pic
->pps
->transform_8x8_mode_flag
<< 0;
142 result
.pps_info_flags
|= pic
->pps
->redundant_pic_cnt_present_flag
<< 1;
143 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 2;
144 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_control_present_flag
<< 3;
145 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_idc
<< 4;
146 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 6;
147 result
.pps_info_flags
|= pic
->pps
->bottom_field_pic_order_in_frame_present_flag
<< 7;
148 result
.pps_info_flags
|= pic
->pps
->entropy_coding_mode_flag
<< 8;
150 result
.num_slice_groups_minus1
= pic
->pps
->num_slice_groups_minus1
;
151 result
.slice_group_map_type
= pic
->pps
->slice_group_map_type
;
152 result
.slice_group_change_rate_minus1
= pic
->pps
->slice_group_change_rate_minus1
;
153 result
.pic_init_qp_minus26
= pic
->pps
->pic_init_qp_minus26
;
154 result
.chroma_qp_index_offset
= pic
->pps
->chroma_qp_index_offset
;
155 result
.second_chroma_qp_index_offset
= pic
->pps
->second_chroma_qp_index_offset
;
157 memcpy(result
.scaling_list_4x4
, pic
->pps
->ScalingList4x4
, 6*16);
158 memcpy(result
.scaling_list_8x8
, pic
->pps
->ScalingList8x8
, 2*64);
160 memcpy(dec
->it
, result
.scaling_list_4x4
, 6*16);
161 memcpy((dec
->it
+ 96), result
.scaling_list_8x8
, 2*64);
163 result
.num_ref_frames
= pic
->num_ref_frames
;
165 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
166 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
168 result
.frame_num
= pic
->frame_num
;
169 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
170 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
171 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
172 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
174 result
.decoded_pic_idx
= pic
->frame_num
;
179 static void radeon_dec_destroy_associated_data(void *data
)
181 /* NOOP, since we only use an intptr */
184 static rvcn_dec_message_hevc_t
get_h265_msg(struct radeon_decoder
*dec
,
185 struct pipe_video_buffer
*target
,
186 struct pipe_h265_picture_desc
*pic
)
188 rvcn_dec_message_hevc_t result
;
191 memset(&result
, 0, sizeof(result
));
192 result
.sps_info_flags
= 0;
193 result
.sps_info_flags
|= pic
->pps
->sps
->scaling_list_enabled_flag
<< 0;
194 result
.sps_info_flags
|= pic
->pps
->sps
->amp_enabled_flag
<< 1;
195 result
.sps_info_flags
|= pic
->pps
->sps
->sample_adaptive_offset_enabled_flag
<< 2;
196 result
.sps_info_flags
|= pic
->pps
->sps
->pcm_enabled_flag
<< 3;
197 result
.sps_info_flags
|= pic
->pps
->sps
->pcm_loop_filter_disabled_flag
<< 4;
198 result
.sps_info_flags
|= pic
->pps
->sps
->long_term_ref_pics_present_flag
<< 5;
199 result
.sps_info_flags
|= pic
->pps
->sps
->sps_temporal_mvp_enabled_flag
<< 6;
200 result
.sps_info_flags
|= pic
->pps
->sps
->strong_intra_smoothing_enabled_flag
<< 7;
201 result
.sps_info_flags
|= pic
->pps
->sps
->separate_colour_plane_flag
<< 8;
202 if (((struct r600_common_screen
*)dec
->screen
)->family
== CHIP_CARRIZO
)
203 result
.sps_info_flags
|= 1 << 9;
204 if (pic
->UseRefPicList
== true)
205 result
.sps_info_flags
|= 1 << 10;
207 result
.chroma_format
= pic
->pps
->sps
->chroma_format_idc
;
208 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
209 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
210 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
211 result
.sps_max_dec_pic_buffering_minus1
= pic
->pps
->sps
->sps_max_dec_pic_buffering_minus1
;
212 result
.log2_min_luma_coding_block_size_minus3
=
213 pic
->pps
->sps
->log2_min_luma_coding_block_size_minus3
;
214 result
.log2_diff_max_min_luma_coding_block_size
=
215 pic
->pps
->sps
->log2_diff_max_min_luma_coding_block_size
;
216 result
.log2_min_transform_block_size_minus2
=
217 pic
->pps
->sps
->log2_min_transform_block_size_minus2
;
218 result
.log2_diff_max_min_transform_block_size
=
219 pic
->pps
->sps
->log2_diff_max_min_transform_block_size
;
220 result
.max_transform_hierarchy_depth_inter
=
221 pic
->pps
->sps
->max_transform_hierarchy_depth_inter
;
222 result
.max_transform_hierarchy_depth_intra
=
223 pic
->pps
->sps
->max_transform_hierarchy_depth_intra
;
224 result
.pcm_sample_bit_depth_luma_minus1
= pic
->pps
->sps
->pcm_sample_bit_depth_luma_minus1
;
225 result
.pcm_sample_bit_depth_chroma_minus1
=
226 pic
->pps
->sps
->pcm_sample_bit_depth_chroma_minus1
;
227 result
.log2_min_pcm_luma_coding_block_size_minus3
=
228 pic
->pps
->sps
->log2_min_pcm_luma_coding_block_size_minus3
;
229 result
.log2_diff_max_min_pcm_luma_coding_block_size
=
230 pic
->pps
->sps
->log2_diff_max_min_pcm_luma_coding_block_size
;
231 result
.num_short_term_ref_pic_sets
= pic
->pps
->sps
->num_short_term_ref_pic_sets
;
233 result
.pps_info_flags
= 0;
234 result
.pps_info_flags
|= pic
->pps
->dependent_slice_segments_enabled_flag
<< 0;
235 result
.pps_info_flags
|= pic
->pps
->output_flag_present_flag
<< 1;
236 result
.pps_info_flags
|= pic
->pps
->sign_data_hiding_enabled_flag
<< 2;
237 result
.pps_info_flags
|= pic
->pps
->cabac_init_present_flag
<< 3;
238 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 4;
239 result
.pps_info_flags
|= pic
->pps
->transform_skip_enabled_flag
<< 5;
240 result
.pps_info_flags
|= pic
->pps
->cu_qp_delta_enabled_flag
<< 6;
241 result
.pps_info_flags
|= pic
->pps
->pps_slice_chroma_qp_offsets_present_flag
<< 7;
242 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 8;
243 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_flag
<< 9;
244 result
.pps_info_flags
|= pic
->pps
->transquant_bypass_enabled_flag
<< 10;
245 result
.pps_info_flags
|= pic
->pps
->tiles_enabled_flag
<< 11;
246 result
.pps_info_flags
|= pic
->pps
->entropy_coding_sync_enabled_flag
<< 12;
247 result
.pps_info_flags
|= pic
->pps
->uniform_spacing_flag
<< 13;
248 result
.pps_info_flags
|= pic
->pps
->loop_filter_across_tiles_enabled_flag
<< 14;
249 result
.pps_info_flags
|= pic
->pps
->pps_loop_filter_across_slices_enabled_flag
<< 15;
250 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_override_enabled_flag
<< 16;
251 result
.pps_info_flags
|= pic
->pps
->pps_deblocking_filter_disabled_flag
<< 17;
252 result
.pps_info_flags
|= pic
->pps
->lists_modification_present_flag
<< 18;
253 result
.pps_info_flags
|= pic
->pps
->slice_segment_header_extension_present_flag
<< 19;
255 result
.num_extra_slice_header_bits
= pic
->pps
->num_extra_slice_header_bits
;
256 result
.num_long_term_ref_pic_sps
= pic
->pps
->sps
->num_long_term_ref_pics_sps
;
257 result
.num_ref_idx_l0_default_active_minus1
= pic
->pps
->num_ref_idx_l0_default_active_minus1
;
258 result
.num_ref_idx_l1_default_active_minus1
= pic
->pps
->num_ref_idx_l1_default_active_minus1
;
259 result
.pps_cb_qp_offset
= pic
->pps
->pps_cb_qp_offset
;
260 result
.pps_cr_qp_offset
= pic
->pps
->pps_cr_qp_offset
;
261 result
.pps_beta_offset_div2
= pic
->pps
->pps_beta_offset_div2
;
262 result
.pps_tc_offset_div2
= pic
->pps
->pps_tc_offset_div2
;
263 result
.diff_cu_qp_delta_depth
= pic
->pps
->diff_cu_qp_delta_depth
;
264 result
.num_tile_columns_minus1
= pic
->pps
->num_tile_columns_minus1
;
265 result
.num_tile_rows_minus1
= pic
->pps
->num_tile_rows_minus1
;
266 result
.log2_parallel_merge_level_minus2
= pic
->pps
->log2_parallel_merge_level_minus2
;
267 result
.init_qp_minus26
= pic
->pps
->init_qp_minus26
;
269 for (i
= 0; i
< 19; ++i
)
270 result
.column_width_minus1
[i
] = pic
->pps
->column_width_minus1
[i
];
272 for (i
= 0; i
< 21; ++i
)
273 result
.row_height_minus1
[i
] = pic
->pps
->row_height_minus1
[i
];
275 result
.num_delta_pocs_ref_rps_idx
= pic
->NumDeltaPocsOfRefRpsIdx
;
276 result
.curr_idx
= pic
->CurrPicOrderCntVal
;
277 result
.curr_poc
= pic
->CurrPicOrderCntVal
;
279 vl_video_buffer_set_associated_data(target
, &dec
->base
,
280 (void *)(uintptr_t)pic
->CurrPicOrderCntVal
,
281 &radeon_dec_destroy_associated_data
);
283 for (i
= 0; i
< 16; ++i
) {
284 struct pipe_video_buffer
*ref
= pic
->ref
[i
];
285 uintptr_t ref_pic
= 0;
287 result
.poc_list
[i
] = pic
->PicOrderCntVal
[i
];
290 ref_pic
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
293 result
.ref_pic_list
[i
] = ref_pic
;
296 for (i
= 0; i
< 8; ++i
) {
297 result
.ref_pic_set_st_curr_before
[i
] = 0xFF;
298 result
.ref_pic_set_st_curr_after
[i
] = 0xFF;
299 result
.ref_pic_set_lt_curr
[i
] = 0xFF;
302 for (i
= 0; i
< pic
->NumPocStCurrBefore
; ++i
)
303 result
.ref_pic_set_st_curr_before
[i
] = pic
->RefPicSetStCurrBefore
[i
];
305 for (i
= 0; i
< pic
->NumPocStCurrAfter
; ++i
)
306 result
.ref_pic_set_st_curr_after
[i
] = pic
->RefPicSetStCurrAfter
[i
];
308 for (i
= 0; i
< pic
->NumPocLtCurr
; ++i
)
309 result
.ref_pic_set_lt_curr
[i
] = pic
->RefPicSetLtCurr
[i
];
311 for (i
= 0; i
< 6; ++i
)
312 result
.ucScalingListDCCoefSizeID2
[i
] = pic
->pps
->sps
->ScalingListDCCoeff16x16
[i
];
314 for (i
= 0; i
< 2; ++i
)
315 result
.ucScalingListDCCoefSizeID3
[i
] = pic
->pps
->sps
->ScalingListDCCoeff32x32
[i
];
317 memcpy(dec
->it
, pic
->pps
->sps
->ScalingList4x4
, 6 * 16);
318 memcpy(dec
->it
+ 96, pic
->pps
->sps
->ScalingList8x8
, 6 * 64);
319 memcpy(dec
->it
+ 480, pic
->pps
->sps
->ScalingList16x16
, 6 * 64);
320 memcpy(dec
->it
+ 864, pic
->pps
->sps
->ScalingList32x32
, 2 * 64);
322 for (i
= 0 ; i
< 2 ; i
++) {
323 for (int j
= 0 ; j
< 15 ; j
++)
324 result
.direct_reflist
[i
][j
] = pic
->RefPicList
[i
][j
];
327 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
) {
328 if (target
->buffer_format
== PIPE_FORMAT_P016
) {
329 result
.p010_mode
= 1;
332 result
.p010_mode
= 0;
333 result
.luma_10to8
= 5;
334 result
.chroma_10to8
= 5;
335 result
.hevc_reserved
[0] = 4; /* sclr_luma10to8 */
336 result
.hevc_reserved
[1] = 4; /* sclr_chroma10to8 */
343 static unsigned calc_ctx_size_h265_main(struct radeon_decoder
*dec
)
345 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
346 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
348 unsigned max_references
= dec
->base
.max_references
+ 1;
350 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
351 max_references
= MAX2(max_references
, 8);
353 max_references
= MAX2(max_references
, 17);
355 width
= align (width
, 16);
356 height
= align (height
, 16);
357 return ((width
+ 255) / 16) * ((height
+ 255) / 16) * 16 * max_references
+ 52 * 1024;
360 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder
*dec
, struct pipe_h265_picture_desc
*pic
)
362 unsigned block_size
, log2_ctb_size
, width_in_ctb
, height_in_ctb
, num_16x16_block_per_ctb
;
363 unsigned context_buffer_size_per_ctb_row
, cm_buffer_size
, max_mb_address
, db_left_tile_pxl_size
;
364 unsigned db_left_tile_ctx_size
= 4096 / 16 * (32 + 16 * 4);
366 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
367 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
368 unsigned coeff_10bit
= (pic
->pps
->sps
->bit_depth_luma_minus8
||
369 pic
->pps
->sps
->bit_depth_chroma_minus8
) ? 2 : 1;
371 unsigned max_references
= dec
->base
.max_references
+ 1;
373 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
374 max_references
= MAX2(max_references
, 8);
376 max_references
= MAX2(max_references
, 17);
378 block_size
= (1 << (pic
->pps
->sps
->log2_min_luma_coding_block_size_minus3
+ 3));
379 log2_ctb_size
= block_size
+ pic
->pps
->sps
->log2_diff_max_min_luma_coding_block_size
;
381 width_in_ctb
= (width
+ ((1 << log2_ctb_size
) - 1)) >> log2_ctb_size
;
382 height_in_ctb
= (height
+ ((1 << log2_ctb_size
) - 1)) >> log2_ctb_size
;
384 num_16x16_block_per_ctb
= ((1 << log2_ctb_size
) >> 4) * ((1 << log2_ctb_size
) >> 4);
385 context_buffer_size_per_ctb_row
= align(width_in_ctb
* num_16x16_block_per_ctb
* 16, 256);
386 max_mb_address
= (unsigned) ceil(height
* 8 / 2048.0);
388 cm_buffer_size
= max_references
* context_buffer_size_per_ctb_row
* height_in_ctb
;
389 db_left_tile_pxl_size
= coeff_10bit
* (max_mb_address
* 2 * 2048 + 1024);
391 return cm_buffer_size
+ db_left_tile_ctx_size
+ db_left_tile_pxl_size
;
394 static rvcn_dec_message_vc1_t
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
396 rvcn_dec_message_vc1_t result
;
398 memset(&result
, 0, sizeof(result
));
399 switch(pic
->base
.profile
) {
400 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
401 result
.profile
= RDECODE_VC1_PROFILE_SIMPLE
;
405 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
406 result
.profile
= RDECODE_VC1_PROFILE_MAIN
;
410 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
411 result
.profile
= RDECODE_VC1_PROFILE_ADVANCED
;
419 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
420 result
.sps_info_flags
|= pic
->pulldown
<< 6;
421 result
.sps_info_flags
|= pic
->interlace
<< 5;
422 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
423 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
424 result
.sps_info_flags
|= pic
->psf
<< 1;
426 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
427 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
428 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
429 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
430 result
.pps_info_flags
|= pic
->multires
<< 21;
431 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
432 result
.pps_info_flags
|= pic
->overlap
<< 11;
433 result
.pps_info_flags
|= pic
->quantizer
<< 9;
434 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
435 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
436 result
.pps_info_flags
|= pic
->vstransform
<< 0;
438 if (pic
->base
.profile
!= PIPE_VIDEO_PROFILE_VC1_SIMPLE
) {
439 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
440 result
.pps_info_flags
|= pic
->rangered
<< 19;
441 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
442 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
443 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
444 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
445 result
.pps_info_flags
|= pic
->dquant
<< 1;
448 result
.chroma_format
= 1;
453 static uint32_t get_ref_pic_idx(struct radeon_decoder
*dec
, struct pipe_video_buffer
*ref
)
455 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
456 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
459 /* seems to be the most sane fallback */
463 /* get the frame number from the associated data */
464 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
466 /* limit the frame number to a valid range */
467 return MAX2(MIN2(frame
, max
), min
);
470 static rvcn_dec_message_mpeg2_vld_t
get_mpeg2_msg(struct radeon_decoder
*dec
,
471 struct pipe_mpeg12_picture_desc
*pic
)
473 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
474 rvcn_dec_message_mpeg2_vld_t result
;
477 memset(&result
, 0, sizeof(result
));
478 result
.decoded_pic_idx
= dec
->frame_number
;
480 result
.forward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[0]);
481 result
.backward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[1]);
483 result
.load_intra_quantiser_matrix
= 1;
484 result
.load_nonintra_quantiser_matrix
= 1;
486 for (i
= 0; i
< 64; ++i
) {
487 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
488 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
491 result
.profile_and_level_indication
= 0;
492 result
.chroma_format
= 0x1;
494 result
.picture_coding_type
= pic
->picture_coding_type
;
495 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
496 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
497 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
498 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
499 result
.intra_dc_precision
= pic
->intra_dc_precision
;
500 result
.pic_structure
= pic
->picture_structure
;
501 result
.top_field_first
= pic
->top_field_first
;
502 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
503 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
504 result
.q_scale_type
= pic
->q_scale_type
;
505 result
.intra_vlc_format
= pic
->intra_vlc_format
;
506 result
.alternate_scan
= pic
->alternate_scan
;
511 static rvcn_dec_message_mpeg4_asp_vld_t
get_mpeg4_msg(struct radeon_decoder
*dec
,
512 struct pipe_mpeg4_picture_desc
*pic
)
514 rvcn_dec_message_mpeg4_asp_vld_t result
;
517 memset(&result
, 0, sizeof(result
));
518 result
.decoded_pic_idx
= dec
->frame_number
;
520 result
.forward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[0]);
521 result
.backward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[1]);
523 result
.variant_type
= 0;
524 result
.profile_and_level_indication
= 0xF0;
526 result
.video_object_layer_verid
= 0x5;
527 result
.video_object_layer_shape
= 0x0;
529 result
.video_object_layer_width
= dec
->base
.width
;
530 result
.video_object_layer_height
= dec
->base
.height
;
532 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
534 result
.short_video_header
|= pic
->short_video_header
<< 0;
535 result
.interlaced
|= pic
->interlaced
<< 2;
536 result
.load_intra_quant_mat
|= 1 << 3;
537 result
.load_nonintra_quant_mat
|= 1 << 4;
538 result
.quarter_sample
|= pic
->quarter_sample
<< 5;
539 result
.complexity_estimation_disable
|= 1 << 6;
540 result
.resync_marker_disable
|= pic
->resync_marker_disable
<< 7;
541 result
.newpred_enable
|= 0 << 10; //
542 result
.reduced_resolution_vop_enable
|= 0 << 11;
544 result
.quant_type
= pic
->quant_type
;
546 for (i
= 0; i
< 64; ++i
) {
547 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
548 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
554 static void rvcn_dec_message_create(struct radeon_decoder
*dec
)
556 rvcn_dec_message_header_t
*header
= dec
->msg
;
557 rvcn_dec_message_create_t
*create
= dec
->msg
+ sizeof(rvcn_dec_message_header_t
);
558 unsigned sizes
= sizeof(rvcn_dec_message_header_t
) + sizeof(rvcn_dec_message_create_t
);
560 memset(dec
->msg
, 0, sizes
);
561 header
->header_size
= sizeof(rvcn_dec_message_header_t
);
562 header
->total_size
= sizes
;
563 header
->num_buffers
= 1;
564 header
->msg_type
= RDECODE_MSG_CREATE
;
565 header
->stream_handle
= dec
->stream_handle
;
566 header
->status_report_feedback_number
= 0;
568 header
->index
[0].message_id
= RDECODE_MESSAGE_CREATE
;
569 header
->index
[0].offset
= sizeof(rvcn_dec_message_header_t
);
570 header
->index
[0].size
= sizeof(rvcn_dec_message_create_t
);
571 header
->index
[0].filled
= 0;
573 create
->stream_type
= dec
->stream_type
;
574 create
->session_flags
= 0;
575 create
->width_in_samples
= dec
->base
.width
;
576 create
->height_in_samples
= dec
->base
.height
;
579 static struct pb_buffer
*rvcn_dec_message_decode(struct radeon_decoder
*dec
,
580 struct pipe_video_buffer
*target
,
581 struct pipe_picture_desc
*picture
)
583 struct r600_texture
*luma
= (struct r600_texture
*)
584 ((struct vl_video_buffer
*)target
)->resources
[0];
585 struct r600_texture
*chroma
= (struct r600_texture
*)
586 ((struct vl_video_buffer
*)target
)->resources
[1];
587 rvcn_dec_message_header_t
*header
;
588 rvcn_dec_message_index_t
*index
;
589 rvcn_dec_message_decode_t
*decode
;
590 unsigned sizes
= 0, offset_decode
, offset_codec
;
594 sizes
+= sizeof(rvcn_dec_message_header_t
);
595 index
= (void*)header
+ sizeof(rvcn_dec_message_header_t
);
596 sizes
+= sizeof(rvcn_dec_message_index_t
);
597 offset_decode
= sizes
;
598 decode
= (void*)index
+ sizeof(rvcn_dec_message_index_t
);
599 sizes
+= sizeof(rvcn_dec_message_decode_t
);
600 offset_codec
= sizes
;
601 codec
= (void*)decode
+ sizeof(rvcn_dec_message_decode_t
);
603 memset(dec
->msg
, 0, sizes
);
604 header
->header_size
= sizeof(rvcn_dec_message_header_t
);
605 header
->total_size
= sizes
;
606 header
->num_buffers
= 2;
607 header
->msg_type
= RDECODE_MSG_DECODE
;
608 header
->stream_handle
= dec
->stream_handle
;
609 header
->status_report_feedback_number
= dec
->frame_number
;
611 header
->index
[0].message_id
= RDECODE_MESSAGE_DECODE
;
612 header
->index
[0].offset
= offset_decode
;
613 header
->index
[0].size
= sizeof(rvcn_dec_message_decode_t
);
614 header
->index
[0].filled
= 0;
616 index
->offset
= offset_codec
;
617 index
->size
= sizeof(rvcn_dec_message_avc_t
);
620 decode
->stream_type
= dec
->stream_type
;;
621 decode
->decode_flags
= 0x1;
622 decode
->width_in_samples
= dec
->base
.width
;;
623 decode
->height_in_samples
= dec
->base
.height
;;
625 decode
->bsd_size
= align(dec
->bs_size
, 128);
626 decode
->dpb_size
= dec
->dpb
.res
->buf
->size
;
628 ((struct r600_resource
*)((struct vl_video_buffer
*)target
)->resources
[0])->buf
->size
+
629 ((struct r600_resource
*)((struct vl_video_buffer
*)target
)->resources
[1])->buf
->size
;
631 decode
->sct_size
= 0;
632 decode
->sc_coeff_size
= 0;
634 decode
->sw_ctxt_size
= RDECODE_SESSION_CONTEXT_SIZE
;
635 decode
->db_pitch
= align(dec
->base
.width
, 32);
636 decode
->db_surf_tile_config
= 0;
638 decode
->dt_pitch
= luma
->surface
.u
.gfx9
.surf_pitch
* luma
->surface
.blk_w
;
639 decode
->dt_uv_pitch
= decode
->dt_pitch
/ 2;
641 decode
->dt_tiling_mode
= 0;
642 decode
->dt_swizzle_mode
= RDECODE_SW_MODE_LINEAR
;
643 decode
->dt_array_mode
= RDECODE_ARRAY_MODE_LINEAR
;
644 decode
->dt_field_mode
= ((struct vl_video_buffer
*)target
)->base
.interlaced
;
645 decode
->dt_surf_tile_config
= 0;
646 decode
->dt_uv_surf_tile_config
= 0;
648 decode
->dt_luma_top_offset
= luma
->surface
.u
.gfx9
.surf_offset
;
649 decode
->dt_chroma_top_offset
= chroma
->surface
.u
.gfx9
.surf_offset
;
650 if (decode
->dt_field_mode
) {
651 decode
->dt_luma_bottom_offset
= luma
->surface
.u
.gfx9
.surf_offset
+
652 luma
->surface
.u
.gfx9
.surf_slice_size
;
653 decode
->dt_chroma_bottom_offset
= chroma
->surface
.u
.gfx9
.surf_offset
+
654 chroma
->surface
.u
.gfx9
.surf_slice_size
;
656 decode
->dt_luma_bottom_offset
= decode
->dt_luma_top_offset
;
657 decode
->dt_chroma_bottom_offset
= decode
->dt_chroma_top_offset
;
660 switch (u_reduce_video_profile(picture
->profile
)) {
661 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
662 rvcn_dec_message_avc_t avc
=
663 get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
664 memcpy(codec
, (void*)&avc
, sizeof(rvcn_dec_message_avc_t
));
665 index
->message_id
= RDECODE_MESSAGE_AVC
;
668 case PIPE_VIDEO_FORMAT_HEVC
: {
669 rvcn_dec_message_hevc_t hevc
=
670 get_h265_msg(dec
, target
, (struct pipe_h265_picture_desc
*)picture
);
672 memcpy(codec
, (void*)&hevc
, sizeof(rvcn_dec_message_hevc_t
));
673 index
->message_id
= RDECODE_MESSAGE_HEVC
;
674 if (dec
->ctx
.res
== NULL
) {
676 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
677 ctx_size
= calc_ctx_size_h265_main10(dec
,
678 (struct pipe_h265_picture_desc
*)picture
);
680 ctx_size
= calc_ctx_size_h265_main(dec
);
681 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
))
682 RVID_ERR("Can't allocated context buffer.\n");
683 si_vid_clear_buffer(dec
->base
.context
, &dec
->ctx
);
687 case PIPE_VIDEO_FORMAT_VC1
: {
688 rvcn_dec_message_vc1_t vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
690 memcpy(codec
, (void*)&vc1
, sizeof(rvcn_dec_message_vc1_t
));
691 if ((picture
->profile
== PIPE_VIDEO_PROFILE_VC1_SIMPLE
) ||
692 (picture
->profile
== PIPE_VIDEO_PROFILE_VC1_MAIN
)) {
693 decode
->width_in_samples
= align(decode
->width_in_samples
, 16) / 16;
694 decode
->height_in_samples
= align(decode
->height_in_samples
, 16) / 16;
696 index
->message_id
= RDECODE_MESSAGE_VC1
;
700 case PIPE_VIDEO_FORMAT_MPEG12
: {
701 rvcn_dec_message_mpeg2_vld_t mpeg2
=
702 get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
704 memcpy(codec
, (void*)&mpeg2
, sizeof(rvcn_dec_message_mpeg2_vld_t
));
705 index
->message_id
= RDECODE_MESSAGE_MPEG2_VLD
;
708 case PIPE_VIDEO_FORMAT_MPEG4
: {
709 rvcn_dec_message_mpeg4_asp_vld_t mpeg4
=
710 get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
712 memcpy(codec
, (void*)&mpeg4
, sizeof(rvcn_dec_message_mpeg4_asp_vld_t
));
713 index
->message_id
= RDECODE_MESSAGE_MPEG4_ASP_VLD
;
722 decode
->hw_ctxt_size
= dec
->ctx
.res
->buf
->size
;
724 return luma
->resource
.buf
;
727 static void rvcn_dec_message_destroy(struct radeon_decoder
*dec
)
729 rvcn_dec_message_header_t
*header
= dec
->msg
;
731 memset(dec
->msg
, 0, sizeof(rvcn_dec_message_header_t
));
732 header
->header_size
= sizeof(rvcn_dec_message_header_t
);
733 header
->total_size
= sizeof(rvcn_dec_message_header_t
) -
734 sizeof(rvcn_dec_message_index_t
);
735 header
->num_buffers
= 0;
736 header
->msg_type
= RDECODE_MSG_DESTROY
;
737 header
->stream_handle
= dec
->stream_handle
;
738 header
->status_report_feedback_number
= 0;
741 static void rvcn_dec_message_feedback(struct radeon_decoder
*dec
)
743 rvcn_dec_feedback_header_t
*header
= (void*)dec
->fb
;
745 header
->header_size
= sizeof(rvcn_dec_feedback_header_t
);
746 header
->total_size
= sizeof(rvcn_dec_feedback_header_t
);
747 header
->num_buffers
= 0;
750 /* flush IB to the hardware */
751 static int flush(struct radeon_decoder
*dec
, unsigned flags
)
753 return dec
->ws
->cs_flush(dec
->cs
, flags
, NULL
);
756 /* add a new set register command to the IB */
757 static void set_reg(struct radeon_decoder
*dec
, unsigned reg
, uint32_t val
)
759 radeon_emit(dec
->cs
, RDECODE_PKT0(reg
>> 2, 0));
760 radeon_emit(dec
->cs
, val
);
763 /* send a command to the VCPU through the GPCOM registers */
764 static void send_cmd(struct radeon_decoder
*dec
, unsigned cmd
,
765 struct pb_buffer
* buf
, uint32_t off
,
766 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
770 dec
->ws
->cs_add_buffer(dec
->cs
, buf
, usage
| RADEON_USAGE_SYNCHRONIZED
,
771 domain
, RADEON_PRIO_UVD
);
772 addr
= dec
->ws
->buffer_get_virtual_address(buf
);
775 set_reg(dec
, RDECODE_GPCOM_VCPU_DATA0
, addr
);
776 set_reg(dec
, RDECODE_GPCOM_VCPU_DATA1
, addr
>> 32);
777 set_reg(dec
, RDECODE_GPCOM_VCPU_CMD
, cmd
<< 1);
780 /* do the codec needs an IT buffer ?*/
781 static bool have_it(struct radeon_decoder
*dec
)
783 return dec
->stream_type
== RDECODE_CODEC_H264_PERF
||
784 dec
->stream_type
== RDECODE_CODEC_H265
;
787 /* map the next available message/feedback/itscaling buffer */
788 static void map_msg_fb_it_buf(struct radeon_decoder
*dec
)
790 struct rvid_buffer
* buf
;
793 /* grab the current message/feedback buffer */
794 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
796 /* and map it for CPU access */
797 ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
, PIPE_TRANSFER_WRITE
);
799 /* calc buffer offsets */
802 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
804 dec
->it
= (uint8_t *)(ptr
+ FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
);
807 /* unmap and send a message command to the VCPU */
808 static void send_msg_buf(struct radeon_decoder
*dec
)
810 struct rvid_buffer
* buf
;
812 /* ignore the request if message/feedback buffer isn't mapped */
813 if (!dec
->msg
|| !dec
->fb
)
816 /* grab the current message buffer */
817 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
819 /* unmap the buffer */
820 dec
->ws
->buffer_unmap(buf
->res
->buf
);
825 if (dec
->sessionctx
.res
)
826 send_cmd(dec
, RDECODE_CMD_SESSION_CONTEXT_BUFFER
,
827 dec
->sessionctx
.res
->buf
, 0, RADEON_USAGE_READWRITE
,
830 /* and send it to the hardware */
831 send_cmd(dec
, RDECODE_CMD_MSG_BUFFER
, buf
->res
->buf
, 0,
832 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
835 /* cycle to the next set of buffers */
836 static void next_buffer(struct radeon_decoder
*dec
)
839 dec
->cur_buffer
%= NUM_BUFFERS
;
842 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder
*dec
)
844 unsigned width_in_mb
, height_in_mb
, ctx_size
;
845 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
846 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
848 unsigned max_references
= dec
->base
.max_references
+ 1;
850 // picture width & height in 16 pixel units
851 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
852 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
854 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
855 unsigned num_dpb_buffer
;
856 switch(dec
->base
.level
) {
858 num_dpb_buffer
= 8100 / fs_in_mb
;
861 num_dpb_buffer
= 18000 / fs_in_mb
;
864 num_dpb_buffer
= 20480 / fs_in_mb
;
867 num_dpb_buffer
= 32768 / fs_in_mb
;
870 num_dpb_buffer
= 34816 / fs_in_mb
;
873 num_dpb_buffer
= 110400 / fs_in_mb
;
876 num_dpb_buffer
= 184320 / fs_in_mb
;
879 num_dpb_buffer
= 184320 / fs_in_mb
;
883 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
884 ctx_size
= max_references
* align(width_in_mb
* height_in_mb
* 192, 256);
889 /* calculate size of reference picture buffer */
890 static unsigned calc_dpb_size(struct radeon_decoder
*dec
)
892 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
894 // always align them to MB size for dpb calculation
895 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
896 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
898 // always one more for currently decoded picture
899 unsigned max_references
= dec
->base
.max_references
+ 1;
901 // aligned size of a single frame
902 image_size
= align(width
, 32) * height
;
903 image_size
+= image_size
/ 2;
904 image_size
= align(image_size
, 1024);
906 // picture width & height in 16 pixel units
907 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
908 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
910 switch (u_reduce_video_profile(dec
->base
.profile
)) {
911 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
912 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
913 unsigned num_dpb_buffer
;
915 switch(dec
->base
.level
) {
917 num_dpb_buffer
= 8100 / fs_in_mb
;
920 num_dpb_buffer
= 18000 / fs_in_mb
;
923 num_dpb_buffer
= 20480 / fs_in_mb
;
926 num_dpb_buffer
= 32768 / fs_in_mb
;
929 num_dpb_buffer
= 34816 / fs_in_mb
;
932 num_dpb_buffer
= 110400 / fs_in_mb
;
935 num_dpb_buffer
= 184320 / fs_in_mb
;
938 num_dpb_buffer
= 184320 / fs_in_mb
;
942 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
943 dpb_size
= image_size
* max_references
;
947 case PIPE_VIDEO_FORMAT_HEVC
:
948 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
949 max_references
= MAX2(max_references
, 8);
951 max_references
= MAX2(max_references
, 17);
953 width
= align (width
, 16);
954 height
= align (height
, 16);
955 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
956 dpb_size
= align((align(width
, 32) * height
* 9) / 4, 256) * max_references
;
958 dpb_size
= align((align(width
, 32) * height
* 3) / 2, 256) * max_references
;
961 case PIPE_VIDEO_FORMAT_VC1
:
962 // the firmware seems to allways assume a minimum of ref frames
963 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
965 // reference picture buffer
966 dpb_size
= image_size
* max_references
;
969 dpb_size
+= width_in_mb
* height_in_mb
* 128;
972 dpb_size
+= width_in_mb
* 64;
975 dpb_size
+= width_in_mb
* 128;
978 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
981 case PIPE_VIDEO_FORMAT_MPEG12
:
982 // reference picture buffer, must be big enough for all frames
983 dpb_size
= image_size
* NUM_MPEG2_REFS
;
986 case PIPE_VIDEO_FORMAT_MPEG4
:
987 // reference picture buffer
988 dpb_size
= image_size
* max_references
;
991 dpb_size
+= width_in_mb
* height_in_mb
* 64;
994 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
996 dpb_size
= MAX2(dpb_size
, 30 * 1024 * 1024);
1000 // something is missing here
1003 // at least use a sane default value
1004 dpb_size
= 32 * 1024 * 1024;
1011 * destroy this video decoder
1013 static void radeon_dec_destroy(struct pipe_video_codec
*decoder
)
1015 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1020 map_msg_fb_it_buf(dec
);
1021 rvcn_dec_message_destroy(dec
);
1026 dec
->ws
->cs_destroy(dec
->cs
);
1028 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1029 si_vid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
1030 si_vid_destroy_buffer(&dec
->bs_buffers
[i
]);
1033 si_vid_destroy_buffer(&dec
->dpb
);
1034 si_vid_destroy_buffer(&dec
->ctx
);
1035 si_vid_destroy_buffer(&dec
->sessionctx
);
1041 * start decoding of a new frame
1043 static void radeon_dec_begin_frame(struct pipe_video_codec
*decoder
,
1044 struct pipe_video_buffer
*target
,
1045 struct pipe_picture_desc
*picture
)
1047 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1052 frame
= ++dec
->frame_number
;
1053 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
1054 &radeon_dec_destroy_associated_data
);
1057 dec
->bs_ptr
= dec
->ws
->buffer_map(
1058 dec
->bs_buffers
[dec
->cur_buffer
].res
->buf
,
1059 dec
->cs
, PIPE_TRANSFER_WRITE
);
1063 * decode a macroblock
1065 static void radeon_dec_decode_macroblock(struct pipe_video_codec
*decoder
,
1066 struct pipe_video_buffer
*target
,
1067 struct pipe_picture_desc
*picture
,
1068 const struct pipe_macroblock
*macroblocks
,
1069 unsigned num_macroblocks
)
1071 /* not supported (yet) */
1076 * decode a bitstream
1078 static void radeon_dec_decode_bitstream(struct pipe_video_codec
*decoder
,
1079 struct pipe_video_buffer
*target
,
1080 struct pipe_picture_desc
*picture
,
1081 unsigned num_buffers
,
1082 const void * const *buffers
,
1083 const unsigned *sizes
)
1085 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1093 for (i
= 0; i
< num_buffers
; ++i
) {
1094 struct rvid_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
1095 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
1097 if (new_size
> buf
->res
->buf
->size
) {
1098 dec
->ws
->buffer_unmap(buf
->res
->buf
);
1099 if (!si_vid_resize_buffer(dec
->screen
, dec
->cs
, buf
, new_size
)) {
1100 RVID_ERR("Can't resize bitstream buffer!");
1104 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
1105 PIPE_TRANSFER_WRITE
);
1109 dec
->bs_ptr
+= dec
->bs_size
;
1112 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
1113 dec
->bs_size
+= sizes
[i
];
1114 dec
->bs_ptr
+= sizes
[i
];
1119 * end decoding of the current frame
1121 static void radeon_dec_end_frame(struct pipe_video_codec
*decoder
,
1122 struct pipe_video_buffer
*target
,
1123 struct pipe_picture_desc
*picture
)
1125 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1126 struct pb_buffer
*dt
;
1127 struct rvid_buffer
*msg_fb_it_buf
, *bs_buf
;
1134 msg_fb_it_buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
1135 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
1137 memset(dec
->bs_ptr
, 0, align(dec
->bs_size
, 128) - dec
->bs_size
);
1138 dec
->ws
->buffer_unmap(bs_buf
->res
->buf
);
1140 map_msg_fb_it_buf(dec
);
1141 dt
= rvcn_dec_message_decode(dec
, target
, picture
);
1142 rvcn_dec_message_feedback(dec
);
1145 send_cmd(dec
, RDECODE_CMD_DPB_BUFFER
, dec
->dpb
.res
->buf
, 0,
1146 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1148 send_cmd(dec
, RDECODE_CMD_CONTEXT_BUFFER
, dec
->ctx
.res
->buf
, 0,
1149 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1150 send_cmd(dec
, RDECODE_CMD_BITSTREAM_BUFFER
, bs_buf
->res
->buf
,
1151 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1152 send_cmd(dec
, RDECODE_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
1153 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
1154 send_cmd(dec
, RDECODE_CMD_FEEDBACK_BUFFER
, msg_fb_it_buf
->res
->buf
,
1155 FB_BUFFER_OFFSET
, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
1157 send_cmd(dec
, RDECODE_CMD_IT_SCALING_TABLE_BUFFER
, msg_fb_it_buf
->res
->buf
,
1158 FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1159 set_reg(dec
, RDECODE_ENGINE_CNTL
, 1);
1161 flush(dec
, RADEON_FLUSH_ASYNC
);
1166 * flush any outstanding command buffers to the hardware
1168 static void radeon_dec_flush(struct pipe_video_codec
*decoder
)
1173 * create and HW decoder
1175 struct pipe_video_codec
*radeon_create_decoder(struct pipe_context
*context
,
1176 const struct pipe_video_codec
*templ
)
1178 struct radeon_winsys
* ws
= ((struct r600_common_context
*)context
)->ws
;
1179 struct r600_common_context
*rctx
= (struct r600_common_context
*)context
;
1180 unsigned width
= templ
->width
, height
= templ
->height
;
1181 unsigned dpb_size
, bs_buf_size
, stream_type
= 0;
1182 struct radeon_decoder
*dec
;
1185 switch(u_reduce_video_profile(templ
->profile
)) {
1186 case PIPE_VIDEO_FORMAT_MPEG12
:
1187 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
)
1188 return vl_create_mpeg12_decoder(context
, templ
);
1189 stream_type
= RDECODE_CODEC_MPEG2_VLD
;
1191 case PIPE_VIDEO_FORMAT_MPEG4
:
1192 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1193 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1194 stream_type
= RDECODE_CODEC_MPEG4
;
1196 case PIPE_VIDEO_FORMAT_VC1
:
1197 stream_type
= RDECODE_CODEC_VC1
;
1199 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1200 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1201 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1202 stream_type
= RDECODE_CODEC_H264_PERF
;
1204 case PIPE_VIDEO_FORMAT_HEVC
:
1205 stream_type
= RDECODE_CODEC_H265
;
1212 dec
= CALLOC_STRUCT(radeon_decoder
);
1218 dec
->base
.context
= context
;
1219 dec
->base
.width
= width
;
1220 dec
->base
.height
= height
;
1222 dec
->base
.destroy
= radeon_dec_destroy
;
1223 dec
->base
.begin_frame
= radeon_dec_begin_frame
;
1224 dec
->base
.decode_macroblock
= radeon_dec_decode_macroblock
;
1225 dec
->base
.decode_bitstream
= radeon_dec_decode_bitstream
;
1226 dec
->base
.end_frame
= radeon_dec_end_frame
;
1227 dec
->base
.flush
= radeon_dec_flush
;
1229 dec
->stream_type
= stream_type
;
1230 dec
->stream_handle
= si_vid_alloc_stream_handle();
1231 dec
->screen
= context
->screen
;
1233 dec
->cs
= ws
->cs_create(rctx
->ctx
, RING_VCN_DEC
, NULL
, NULL
);
1235 RVID_ERR("Can't get command submission context.\n");
1239 bs_buf_size
= width
* height
* (512 / (16 * 16));
1240 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1241 unsigned msg_fb_it_size
= FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
;
1243 msg_fb_it_size
+= IT_SCALING_TABLE_SIZE
;
1244 /* use vram to improve performance, workaround an unknown bug */
1245 if (!si_vid_create_buffer(dec
->screen
, &dec
->msg_fb_it_buffers
[i
],
1246 msg_fb_it_size
, PIPE_USAGE_DEFAULT
)) {
1247 RVID_ERR("Can't allocated message buffers.\n");
1251 if (!si_vid_create_buffer(dec
->screen
, &dec
->bs_buffers
[i
],
1252 bs_buf_size
, PIPE_USAGE_STAGING
)) {
1253 RVID_ERR("Can't allocated bitstream buffers.\n");
1257 si_vid_clear_buffer(context
, &dec
->msg_fb_it_buffers
[i
]);
1258 si_vid_clear_buffer(context
, &dec
->bs_buffers
[i
]);
1261 dpb_size
= calc_dpb_size(dec
);
1263 if (!si_vid_create_buffer(dec
->screen
, &dec
->dpb
, dpb_size
, PIPE_USAGE_DEFAULT
)) {
1264 RVID_ERR("Can't allocated dpb.\n");
1268 si_vid_clear_buffer(context
, &dec
->dpb
);
1270 if (dec
->stream_type
== RDECODE_CODEC_H264_PERF
) {
1271 unsigned ctx_size
= calc_ctx_size_h264_perf(dec
);
1272 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
)) {
1273 RVID_ERR("Can't allocated context buffer.\n");
1276 si_vid_clear_buffer(context
, &dec
->ctx
);
1279 if (!si_vid_create_buffer(dec
->screen
, &dec
->sessionctx
,
1280 RDECODE_SESSION_CONTEXT_SIZE
,
1281 PIPE_USAGE_DEFAULT
)) {
1282 RVID_ERR("Can't allocated session ctx.\n");
1285 si_vid_clear_buffer(context
, &dec
->sessionctx
);
1287 map_msg_fb_it_buf(dec
);
1288 rvcn_dec_message_create(dec
);
1299 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
1301 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1302 si_vid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
1303 si_vid_destroy_buffer(&dec
->bs_buffers
[i
]);
1306 si_vid_destroy_buffer(&dec
->dpb
);
1307 si_vid_destroy_buffer(&dec
->ctx
);
1308 si_vid_destroy_buffer(&dec
->sessionctx
);