1 /**************************************************************************
3 * Copyright 2017 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "radeon_vcn_dec.h"
30 #include "pipe/p_video_codec.h"
31 #include "radeon_video.h"
32 #include "radeonsi/si_pipe.h"
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
35 #include "vl/vl_mpeg12_decoder.h"
36 #include "vl/vl_probs_table.h"
41 #define FB_BUFFER_OFFSET 0x1000
42 #define FB_BUFFER_SIZE 2048
43 #define IT_SCALING_TABLE_SIZE 992
44 #define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256)
45 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
47 #define RDECODE_VCN1_GPCOM_VCPU_CMD 0x2070c
48 #define RDECODE_VCN1_GPCOM_VCPU_DATA0 0x20710
49 #define RDECODE_VCN1_GPCOM_VCPU_DATA1 0x20714
50 #define RDECODE_VCN1_ENGINE_CNTL 0x20718
52 #define RDECODE_VCN2_GPCOM_VCPU_CMD (0x503 << 2)
53 #define RDECODE_VCN2_GPCOM_VCPU_DATA0 (0x504 << 2)
54 #define RDECODE_VCN2_GPCOM_VCPU_DATA1 (0x505 << 2)
55 #define RDECODE_VCN2_ENGINE_CNTL (0x506 << 2)
57 #define RDECODE_VCN2_5_GPCOM_VCPU_CMD 0x3c
58 #define RDECODE_VCN2_5_GPCOM_VCPU_DATA0 0x40
59 #define RDECODE_VCN2_5_GPCOM_VCPU_DATA1 0x44
60 #define RDECODE_VCN2_5_ENGINE_CNTL 0x9b4
62 #define NUM_MPEG2_REFS 6
63 #define NUM_H264_REFS 17
64 #define NUM_VC1_REFS 5
65 #define NUM_VP9_REFS 8
67 static rvcn_dec_message_avc_t
get_h264_msg(struct radeon_decoder
*dec
,
68 struct pipe_h264_picture_desc
*pic
)
70 rvcn_dec_message_avc_t result
;
72 memset(&result
, 0, sizeof(result
));
73 switch (pic
->base
.profile
) {
74 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
75 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE
:
76 result
.profile
= RDECODE_H264_PROFILE_BASELINE
;
79 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
80 result
.profile
= RDECODE_H264_PROFILE_MAIN
;
83 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
84 result
.profile
= RDECODE_H264_PROFILE_HIGH
;
92 result
.level
= dec
->base
.level
;
94 result
.sps_info_flags
= 0;
95 result
.sps_info_flags
|= pic
->pps
->sps
->direct_8x8_inference_flag
<< 0;
96 result
.sps_info_flags
|= pic
->pps
->sps
->mb_adaptive_frame_field_flag
<< 1;
97 result
.sps_info_flags
|= pic
->pps
->sps
->frame_mbs_only_flag
<< 2;
98 result
.sps_info_flags
|= pic
->pps
->sps
->delta_pic_order_always_zero_flag
<< 3;
99 result
.sps_info_flags
|= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT
;
101 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
102 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
103 result
.log2_max_frame_num_minus4
= pic
->pps
->sps
->log2_max_frame_num_minus4
;
104 result
.pic_order_cnt_type
= pic
->pps
->sps
->pic_order_cnt_type
;
105 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
107 switch (dec
->base
.chroma_format
) {
108 case PIPE_VIDEO_CHROMA_FORMAT_NONE
:
110 case PIPE_VIDEO_CHROMA_FORMAT_400
:
111 result
.chroma_format
= 0;
113 case PIPE_VIDEO_CHROMA_FORMAT_420
:
114 result
.chroma_format
= 1;
116 case PIPE_VIDEO_CHROMA_FORMAT_422
:
117 result
.chroma_format
= 2;
119 case PIPE_VIDEO_CHROMA_FORMAT_444
:
120 result
.chroma_format
= 3;
124 result
.pps_info_flags
= 0;
125 result
.pps_info_flags
|= pic
->pps
->transform_8x8_mode_flag
<< 0;
126 result
.pps_info_flags
|= pic
->pps
->redundant_pic_cnt_present_flag
<< 1;
127 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 2;
128 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_control_present_flag
<< 3;
129 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_idc
<< 4;
130 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 6;
131 result
.pps_info_flags
|= pic
->pps
->bottom_field_pic_order_in_frame_present_flag
<< 7;
132 result
.pps_info_flags
|= pic
->pps
->entropy_coding_mode_flag
<< 8;
134 result
.num_slice_groups_minus1
= pic
->pps
->num_slice_groups_minus1
;
135 result
.slice_group_map_type
= pic
->pps
->slice_group_map_type
;
136 result
.slice_group_change_rate_minus1
= pic
->pps
->slice_group_change_rate_minus1
;
137 result
.pic_init_qp_minus26
= pic
->pps
->pic_init_qp_minus26
;
138 result
.chroma_qp_index_offset
= pic
->pps
->chroma_qp_index_offset
;
139 result
.second_chroma_qp_index_offset
= pic
->pps
->second_chroma_qp_index_offset
;
141 memcpy(result
.scaling_list_4x4
, pic
->pps
->ScalingList4x4
, 6 * 16);
142 memcpy(result
.scaling_list_8x8
, pic
->pps
->ScalingList8x8
, 2 * 64);
144 memcpy(dec
->it
, result
.scaling_list_4x4
, 6 * 16);
145 memcpy((dec
->it
+ 96), result
.scaling_list_8x8
, 2 * 64);
147 result
.num_ref_frames
= pic
->num_ref_frames
;
149 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
150 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
152 result
.frame_num
= pic
->frame_num
;
153 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4 * 16);
154 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
155 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
156 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4 * 16 * 2);
158 result
.decoded_pic_idx
= pic
->frame_num
;
163 static void radeon_dec_destroy_associated_data(void *data
)
165 /* NOOP, since we only use an intptr */
168 static rvcn_dec_message_hevc_t
get_h265_msg(struct radeon_decoder
*dec
,
169 struct pipe_video_buffer
*target
,
170 struct pipe_h265_picture_desc
*pic
)
172 rvcn_dec_message_hevc_t result
;
175 memset(&result
, 0, sizeof(result
));
176 result
.sps_info_flags
= 0;
177 result
.sps_info_flags
|= pic
->pps
->sps
->scaling_list_enabled_flag
<< 0;
178 result
.sps_info_flags
|= pic
->pps
->sps
->amp_enabled_flag
<< 1;
179 result
.sps_info_flags
|= pic
->pps
->sps
->sample_adaptive_offset_enabled_flag
<< 2;
180 result
.sps_info_flags
|= pic
->pps
->sps
->pcm_enabled_flag
<< 3;
181 result
.sps_info_flags
|= pic
->pps
->sps
->pcm_loop_filter_disabled_flag
<< 4;
182 result
.sps_info_flags
|= pic
->pps
->sps
->long_term_ref_pics_present_flag
<< 5;
183 result
.sps_info_flags
|= pic
->pps
->sps
->sps_temporal_mvp_enabled_flag
<< 6;
184 result
.sps_info_flags
|= pic
->pps
->sps
->strong_intra_smoothing_enabled_flag
<< 7;
185 result
.sps_info_flags
|= pic
->pps
->sps
->separate_colour_plane_flag
<< 8;
186 if (((struct si_screen
*)dec
->screen
)->info
.family
== CHIP_CARRIZO
)
187 result
.sps_info_flags
|= 1 << 9;
188 if (pic
->UseRefPicList
== true)
189 result
.sps_info_flags
|= 1 << 10;
191 result
.chroma_format
= pic
->pps
->sps
->chroma_format_idc
;
192 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
193 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
194 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
195 result
.sps_max_dec_pic_buffering_minus1
= pic
->pps
->sps
->sps_max_dec_pic_buffering_minus1
;
196 result
.log2_min_luma_coding_block_size_minus3
=
197 pic
->pps
->sps
->log2_min_luma_coding_block_size_minus3
;
198 result
.log2_diff_max_min_luma_coding_block_size
=
199 pic
->pps
->sps
->log2_diff_max_min_luma_coding_block_size
;
200 result
.log2_min_transform_block_size_minus2
=
201 pic
->pps
->sps
->log2_min_transform_block_size_minus2
;
202 result
.log2_diff_max_min_transform_block_size
=
203 pic
->pps
->sps
->log2_diff_max_min_transform_block_size
;
204 result
.max_transform_hierarchy_depth_inter
= pic
->pps
->sps
->max_transform_hierarchy_depth_inter
;
205 result
.max_transform_hierarchy_depth_intra
= pic
->pps
->sps
->max_transform_hierarchy_depth_intra
;
206 result
.pcm_sample_bit_depth_luma_minus1
= pic
->pps
->sps
->pcm_sample_bit_depth_luma_minus1
;
207 result
.pcm_sample_bit_depth_chroma_minus1
= pic
->pps
->sps
->pcm_sample_bit_depth_chroma_minus1
;
208 result
.log2_min_pcm_luma_coding_block_size_minus3
=
209 pic
->pps
->sps
->log2_min_pcm_luma_coding_block_size_minus3
;
210 result
.log2_diff_max_min_pcm_luma_coding_block_size
=
211 pic
->pps
->sps
->log2_diff_max_min_pcm_luma_coding_block_size
;
212 result
.num_short_term_ref_pic_sets
= pic
->pps
->sps
->num_short_term_ref_pic_sets
;
214 result
.pps_info_flags
= 0;
215 result
.pps_info_flags
|= pic
->pps
->dependent_slice_segments_enabled_flag
<< 0;
216 result
.pps_info_flags
|= pic
->pps
->output_flag_present_flag
<< 1;
217 result
.pps_info_flags
|= pic
->pps
->sign_data_hiding_enabled_flag
<< 2;
218 result
.pps_info_flags
|= pic
->pps
->cabac_init_present_flag
<< 3;
219 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 4;
220 result
.pps_info_flags
|= pic
->pps
->transform_skip_enabled_flag
<< 5;
221 result
.pps_info_flags
|= pic
->pps
->cu_qp_delta_enabled_flag
<< 6;
222 result
.pps_info_flags
|= pic
->pps
->pps_slice_chroma_qp_offsets_present_flag
<< 7;
223 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 8;
224 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_flag
<< 9;
225 result
.pps_info_flags
|= pic
->pps
->transquant_bypass_enabled_flag
<< 10;
226 result
.pps_info_flags
|= pic
->pps
->tiles_enabled_flag
<< 11;
227 result
.pps_info_flags
|= pic
->pps
->entropy_coding_sync_enabled_flag
<< 12;
228 result
.pps_info_flags
|= pic
->pps
->uniform_spacing_flag
<< 13;
229 result
.pps_info_flags
|= pic
->pps
->loop_filter_across_tiles_enabled_flag
<< 14;
230 result
.pps_info_flags
|= pic
->pps
->pps_loop_filter_across_slices_enabled_flag
<< 15;
231 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_override_enabled_flag
<< 16;
232 result
.pps_info_flags
|= pic
->pps
->pps_deblocking_filter_disabled_flag
<< 17;
233 result
.pps_info_flags
|= pic
->pps
->lists_modification_present_flag
<< 18;
234 result
.pps_info_flags
|= pic
->pps
->slice_segment_header_extension_present_flag
<< 19;
236 result
.num_extra_slice_header_bits
= pic
->pps
->num_extra_slice_header_bits
;
237 result
.num_long_term_ref_pic_sps
= pic
->pps
->sps
->num_long_term_ref_pics_sps
;
238 result
.num_ref_idx_l0_default_active_minus1
= pic
->pps
->num_ref_idx_l0_default_active_minus1
;
239 result
.num_ref_idx_l1_default_active_minus1
= pic
->pps
->num_ref_idx_l1_default_active_minus1
;
240 result
.pps_cb_qp_offset
= pic
->pps
->pps_cb_qp_offset
;
241 result
.pps_cr_qp_offset
= pic
->pps
->pps_cr_qp_offset
;
242 result
.pps_beta_offset_div2
= pic
->pps
->pps_beta_offset_div2
;
243 result
.pps_tc_offset_div2
= pic
->pps
->pps_tc_offset_div2
;
244 result
.diff_cu_qp_delta_depth
= pic
->pps
->diff_cu_qp_delta_depth
;
245 result
.num_tile_columns_minus1
= pic
->pps
->num_tile_columns_minus1
;
246 result
.num_tile_rows_minus1
= pic
->pps
->num_tile_rows_minus1
;
247 result
.log2_parallel_merge_level_minus2
= pic
->pps
->log2_parallel_merge_level_minus2
;
248 result
.init_qp_minus26
= pic
->pps
->init_qp_minus26
;
250 for (i
= 0; i
< 19; ++i
)
251 result
.column_width_minus1
[i
] = pic
->pps
->column_width_minus1
[i
];
253 for (i
= 0; i
< 21; ++i
)
254 result
.row_height_minus1
[i
] = pic
->pps
->row_height_minus1
[i
];
256 result
.num_delta_pocs_ref_rps_idx
= pic
->NumDeltaPocsOfRefRpsIdx
;
257 result
.curr_poc
= pic
->CurrPicOrderCntVal
;
259 for (i
= 0; i
< ARRAY_SIZE(dec
->render_pic_list
); i
++) {
261 (pic
->ref
[j
] != NULL
) && (j
< ARRAY_SIZE(dec
->render_pic_list
));
263 if (dec
->render_pic_list
[i
] == pic
->ref
[j
])
265 if (j
== ARRAY_SIZE(dec
->render_pic_list
) - 1)
266 dec
->render_pic_list
[i
] = NULL
;
267 else if (pic
->ref
[j
+ 1] == NULL
)
268 dec
->render_pic_list
[i
] = NULL
;
271 for (i
= 0; i
< ARRAY_SIZE(dec
->render_pic_list
); i
++) {
272 if (dec
->render_pic_list
[i
] == NULL
) {
273 dec
->render_pic_list
[i
] = target
;
279 vl_video_buffer_set_associated_data(target
, &dec
->base
, (void *)(uintptr_t)result
.curr_idx
,
280 &radeon_dec_destroy_associated_data
);
282 for (i
= 0; i
< 16; ++i
) {
283 struct pipe_video_buffer
*ref
= pic
->ref
[i
];
284 uintptr_t ref_pic
= 0;
286 result
.poc_list
[i
] = pic
->PicOrderCntVal
[i
];
289 ref_pic
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
292 result
.ref_pic_list
[i
] = ref_pic
;
295 for (i
= 0; i
< 8; ++i
) {
296 result
.ref_pic_set_st_curr_before
[i
] = 0xFF;
297 result
.ref_pic_set_st_curr_after
[i
] = 0xFF;
298 result
.ref_pic_set_lt_curr
[i
] = 0xFF;
301 for (i
= 0; i
< pic
->NumPocStCurrBefore
; ++i
)
302 result
.ref_pic_set_st_curr_before
[i
] = pic
->RefPicSetStCurrBefore
[i
];
304 for (i
= 0; i
< pic
->NumPocStCurrAfter
; ++i
)
305 result
.ref_pic_set_st_curr_after
[i
] = pic
->RefPicSetStCurrAfter
[i
];
307 for (i
= 0; i
< pic
->NumPocLtCurr
; ++i
)
308 result
.ref_pic_set_lt_curr
[i
] = pic
->RefPicSetLtCurr
[i
];
310 for (i
= 0; i
< 6; ++i
)
311 result
.ucScalingListDCCoefSizeID2
[i
] = pic
->pps
->sps
->ScalingListDCCoeff16x16
[i
];
313 for (i
= 0; i
< 2; ++i
)
314 result
.ucScalingListDCCoefSizeID3
[i
] = pic
->pps
->sps
->ScalingListDCCoeff32x32
[i
];
316 memcpy(dec
->it
, pic
->pps
->sps
->ScalingList4x4
, 6 * 16);
317 memcpy(dec
->it
+ 96, pic
->pps
->sps
->ScalingList8x8
, 6 * 64);
318 memcpy(dec
->it
+ 480, pic
->pps
->sps
->ScalingList16x16
, 6 * 64);
319 memcpy(dec
->it
+ 864, pic
->pps
->sps
->ScalingList32x32
, 2 * 64);
321 for (i
= 0; i
< 2; i
++) {
322 for (j
= 0; j
< 15; j
++)
323 result
.direct_reflist
[i
][j
] = pic
->RefPicList
[i
][j
];
326 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
) {
327 if (target
->buffer_format
== PIPE_FORMAT_P010
|| target
->buffer_format
== PIPE_FORMAT_P016
) {
328 result
.p010_mode
= 1;
331 result
.p010_mode
= 0;
332 result
.luma_10to8
= 5;
333 result
.chroma_10to8
= 5;
334 result
.hevc_reserved
[0] = 4; /* sclr_luma10to8 */
335 result
.hevc_reserved
[1] = 4; /* sclr_chroma10to8 */
342 static void fill_probs_table(void *ptr
)
344 rvcn_dec_vp9_probs_t
*probs
= (rvcn_dec_vp9_probs_t
*)ptr
;
346 memcpy(&probs
->coef_probs
[0], default_coef_probs_4x4
, sizeof(default_coef_probs_4x4
));
347 memcpy(&probs
->coef_probs
[1], default_coef_probs_8x8
, sizeof(default_coef_probs_8x8
));
348 memcpy(&probs
->coef_probs
[2], default_coef_probs_16x16
, sizeof(default_coef_probs_16x16
));
349 memcpy(&probs
->coef_probs
[3], default_coef_probs_32x32
, sizeof(default_coef_probs_32x32
));
350 memcpy(probs
->y_mode_prob
, default_if_y_probs
, sizeof(default_if_y_probs
));
351 memcpy(probs
->uv_mode_prob
, default_if_uv_probs
, sizeof(default_if_uv_probs
));
352 memcpy(probs
->single_ref_prob
, default_single_ref_p
, sizeof(default_single_ref_p
));
353 memcpy(probs
->switchable_interp_prob
, default_switchable_interp_prob
,
354 sizeof(default_switchable_interp_prob
));
355 memcpy(probs
->partition_prob
, default_partition_probs
, sizeof(default_partition_probs
));
356 memcpy(probs
->inter_mode_probs
, default_inter_mode_probs
, sizeof(default_inter_mode_probs
));
357 memcpy(probs
->mbskip_probs
, default_skip_probs
, sizeof(default_skip_probs
));
358 memcpy(probs
->intra_inter_prob
, default_intra_inter_p
, sizeof(default_intra_inter_p
));
359 memcpy(probs
->comp_inter_prob
, default_comp_inter_p
, sizeof(default_comp_inter_p
));
360 memcpy(probs
->comp_ref_prob
, default_comp_ref_p
, sizeof(default_comp_ref_p
));
361 memcpy(probs
->tx_probs_32x32
, default_tx_probs_32x32
, sizeof(default_tx_probs_32x32
));
362 memcpy(probs
->tx_probs_16x16
, default_tx_probs_16x16
, sizeof(default_tx_probs_16x16
));
363 memcpy(probs
->tx_probs_8x8
, default_tx_probs_8x8
, sizeof(default_tx_probs_8x8
));
364 memcpy(probs
->mv_joints
, default_nmv_joints
, sizeof(default_nmv_joints
));
365 memcpy(&probs
->mv_comps
[0], default_nmv_components
, sizeof(default_nmv_components
));
366 memset(&probs
->nmvc_mask
, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t
));
369 static rvcn_dec_message_vp9_t
get_vp9_msg(struct radeon_decoder
*dec
,
370 struct pipe_video_buffer
*target
,
371 struct pipe_vp9_picture_desc
*pic
)
373 rvcn_dec_message_vp9_t result
;
376 memset(&result
, 0, sizeof(result
));
379 rvcn_dec_vp9_probs_segment_t
*prbs
= (rvcn_dec_vp9_probs_segment_t
*)(dec
->probs
);
381 if (pic
->picture_parameter
.pic_fields
.segmentation_enabled
) {
382 for (i
= 0; i
< 8; ++i
) {
383 prbs
->seg
.feature_data
[i
] =
384 (pic
->slice_parameter
.seg_param
[i
].alt_quant
& 0xffff) |
385 ((pic
->slice_parameter
.seg_param
[i
].alt_lf
& 0xff) << 16) |
386 ((pic
->slice_parameter
.seg_param
[i
].segment_flags
.segment_reference
& 0xf) << 24);
387 prbs
->seg
.feature_mask
[i
] =
388 (pic
->slice_parameter
.seg_param
[i
].alt_quant_enabled
<< 0) |
389 (pic
->slice_parameter
.seg_param
[i
].alt_lf_enabled
<< 1) |
390 (pic
->slice_parameter
.seg_param
[i
].segment_flags
.segment_reference_enabled
<< 2) |
391 (pic
->slice_parameter
.seg_param
[i
].segment_flags
.segment_reference_skipped
<< 3);
394 for (i
= 0; i
< 7; ++i
)
395 prbs
->seg
.tree_probs
[i
] = pic
->picture_parameter
.mb_segment_tree_probs
[i
];
397 for (i
= 0; i
< 3; ++i
)
398 prbs
->seg
.pred_probs
[i
] = pic
->picture_parameter
.segment_pred_probs
[i
];
400 prbs
->seg
.abs_delta
= 0;
402 memset(&prbs
->seg
, 0, 256);
404 result
.frame_header_flags
= (pic
->picture_parameter
.pic_fields
.frame_type
405 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT
) &
406 RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK
;
408 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.error_resilient_mode
409 << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT
) &
410 RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK
;
412 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.intra_only
413 << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT
) &
414 RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK
;
416 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.allow_high_precision_mv
417 << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT
) &
418 RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK
;
420 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.frame_parallel_decoding_mode
421 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT
) &
422 RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK
;
424 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.refresh_frame_context
425 << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT
) &
426 RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK
;
428 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.segmentation_enabled
429 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT
) &
430 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK
;
432 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.segmentation_update_map
433 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT
) &
434 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK
;
436 result
.frame_header_flags
|= (pic
->picture_parameter
.pic_fields
.segmentation_temporal_update
437 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT
) &
438 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK
;
440 result
.frame_header_flags
|= (pic
->picture_parameter
.mode_ref_delta_enabled
441 << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT
) &
442 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK
;
444 result
.frame_header_flags
|= (pic
->picture_parameter
.mode_ref_delta_update
445 << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT
) &
446 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK
;
448 result
.frame_header_flags
|=
449 ((dec
->show_frame
&& !pic
->picture_parameter
.pic_fields
.error_resilient_mode
)
450 << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT
) &
451 RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK
;
452 dec
->show_frame
= pic
->picture_parameter
.pic_fields
.show_frame
;
454 result
.interp_filter
= pic
->picture_parameter
.pic_fields
.mcomp_filter_type
;
456 result
.frame_context_idx
= pic
->picture_parameter
.pic_fields
.frame_context_idx
;
457 result
.reset_frame_context
= pic
->picture_parameter
.pic_fields
.reset_frame_context
;
459 result
.filter_level
= pic
->picture_parameter
.filter_level
;
460 result
.sharpness_level
= pic
->picture_parameter
.sharpness_level
;
462 for (i
= 0; i
< 8; ++i
)
463 memcpy(result
.lf_adj_level
[i
], pic
->slice_parameter
.seg_param
[i
].filter_level
, 4 * 2);
465 if (pic
->picture_parameter
.pic_fields
.lossless_flag
) {
466 result
.base_qindex
= 0;
467 result
.y_dc_delta_q
= 0;
468 result
.uv_ac_delta_q
= 0;
469 result
.uv_dc_delta_q
= 0;
471 result
.base_qindex
= pic
->picture_parameter
.base_qindex
;
472 result
.y_dc_delta_q
= pic
->picture_parameter
.y_dc_delta_q
;
473 result
.uv_ac_delta_q
= pic
->picture_parameter
.uv_ac_delta_q
;
474 result
.uv_dc_delta_q
= pic
->picture_parameter
.uv_dc_delta_q
;
477 result
.log2_tile_cols
= pic
->picture_parameter
.log2_tile_columns
;
478 result
.log2_tile_rows
= pic
->picture_parameter
.log2_tile_rows
;
479 result
.chroma_format
= 1;
480 result
.bit_depth_luma_minus8
= result
.bit_depth_chroma_minus8
=
481 (pic
->picture_parameter
.bit_depth
- 8);
483 result
.vp9_frame_size
= align(dec
->bs_size
, 128);
484 result
.uncompressed_header_size
= pic
->picture_parameter
.frame_header_length_in_bytes
;
485 result
.compressed_header_size
= pic
->picture_parameter
.first_partition_size
;
487 assert(dec
->base
.max_references
+ 1 <= ARRAY_SIZE(dec
->render_pic_list
));
489 //clear the dec->render list if it is not used as a reference
490 for (i
= 0; i
< ARRAY_SIZE(dec
->render_pic_list
); i
++) {
491 if (dec
->render_pic_list
[i
]) {
493 if (dec
->render_pic_list
[i
] == pic
->ref
[j
])
497 dec
->render_pic_list
[i
] = NULL
;
501 for (i
= 0; i
< ARRAY_SIZE(dec
->render_pic_list
); ++i
) {
502 if (dec
->render_pic_list
[i
] && dec
->render_pic_list
[i
] == target
) {
503 if (target
->codec
!= NULL
){
504 result
.curr_pic_idx
=(uintptr_t)vl_video_buffer_get_associated_data(target
, &dec
->base
);
506 result
.curr_pic_idx
= i
;
507 vl_video_buffer_set_associated_data(target
, &dec
->base
, (void *)(uintptr_t)i
,
508 &radeon_dec_destroy_associated_data
);
511 } else if (!dec
->render_pic_list
[i
]) {
512 dec
->render_pic_list
[i
] = target
;
513 result
.curr_pic_idx
= i
;
514 vl_video_buffer_set_associated_data(target
, &dec
->base
, (void *)(uintptr_t)i
,
515 &radeon_dec_destroy_associated_data
);
520 for (i
= 0; i
< 8; i
++) {
521 result
.ref_frame_map
[i
] =
522 (pic
->ref
[i
]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic
->ref
[i
], &dec
->base
)
526 result
.frame_refs
[0] = result
.ref_frame_map
[pic
->picture_parameter
.pic_fields
.last_ref_frame
];
527 result
.ref_frame_sign_bias
[0] = pic
->picture_parameter
.pic_fields
.last_ref_frame_sign_bias
;
528 result
.frame_refs
[1] = result
.ref_frame_map
[pic
->picture_parameter
.pic_fields
.golden_ref_frame
];
529 result
.ref_frame_sign_bias
[1] = pic
->picture_parameter
.pic_fields
.golden_ref_frame_sign_bias
;
530 result
.frame_refs
[2] = result
.ref_frame_map
[pic
->picture_parameter
.pic_fields
.alt_ref_frame
];
531 result
.ref_frame_sign_bias
[2] = pic
->picture_parameter
.pic_fields
.alt_ref_frame_sign_bias
;
533 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_VP9_PROFILE2
) {
534 if (target
->buffer_format
== PIPE_FORMAT_P010
|| target
->buffer_format
== PIPE_FORMAT_P016
) {
535 result
.p010_mode
= 1;
538 result
.p010_mode
= 0;
539 result
.luma_10to8
= 1;
540 result
.chroma_10to8
= 1;
547 static unsigned calc_ctx_size_h265_main(struct radeon_decoder
*dec
)
549 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
550 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
552 unsigned max_references
= dec
->base
.max_references
+ 1;
554 if (dec
->base
.width
* dec
->base
.height
>= 4096 * 2000)
555 max_references
= MAX2(max_references
, 8);
557 max_references
= MAX2(max_references
, 17);
559 width
= align(width
, 16);
560 height
= align(height
, 16);
561 return ((width
+ 255) / 16) * ((height
+ 255) / 16) * 16 * max_references
+ 52 * 1024;
564 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder
*dec
,
565 struct pipe_h265_picture_desc
*pic
)
567 unsigned log2_ctb_size
, width_in_ctb
, height_in_ctb
, num_16x16_block_per_ctb
;
568 unsigned context_buffer_size_per_ctb_row
, cm_buffer_size
, max_mb_address
, db_left_tile_pxl_size
;
569 unsigned db_left_tile_ctx_size
= 4096 / 16 * (32 + 16 * 4);
571 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
572 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
573 unsigned coeff_10bit
=
574 (pic
->pps
->sps
->bit_depth_luma_minus8
|| pic
->pps
->sps
->bit_depth_chroma_minus8
) ? 2 : 1;
576 unsigned max_references
= dec
->base
.max_references
+ 1;
578 if (dec
->base
.width
* dec
->base
.height
>= 4096 * 2000)
579 max_references
= MAX2(max_references
, 8);
581 max_references
= MAX2(max_references
, 17);
583 log2_ctb_size
= pic
->pps
->sps
->log2_min_luma_coding_block_size_minus3
+ 3 +
584 pic
->pps
->sps
->log2_diff_max_min_luma_coding_block_size
;
586 width_in_ctb
= (width
+ ((1 << log2_ctb_size
) - 1)) >> log2_ctb_size
;
587 height_in_ctb
= (height
+ ((1 << log2_ctb_size
) - 1)) >> log2_ctb_size
;
589 num_16x16_block_per_ctb
= ((1 << log2_ctb_size
) >> 4) * ((1 << log2_ctb_size
) >> 4);
590 context_buffer_size_per_ctb_row
= align(width_in_ctb
* num_16x16_block_per_ctb
* 16, 256);
591 max_mb_address
= (unsigned)ceil(height
* 8 / 2048.0);
593 cm_buffer_size
= max_references
* context_buffer_size_per_ctb_row
* height_in_ctb
;
594 db_left_tile_pxl_size
= coeff_10bit
* (max_mb_address
* 2 * 2048 + 1024);
596 return cm_buffer_size
+ db_left_tile_ctx_size
+ db_left_tile_pxl_size
;
599 static rvcn_dec_message_vc1_t
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
601 rvcn_dec_message_vc1_t result
;
603 memset(&result
, 0, sizeof(result
));
604 switch (pic
->base
.profile
) {
605 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
606 result
.profile
= RDECODE_VC1_PROFILE_SIMPLE
;
610 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
611 result
.profile
= RDECODE_VC1_PROFILE_MAIN
;
615 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
616 result
.profile
= RDECODE_VC1_PROFILE_ADVANCED
;
624 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
625 result
.sps_info_flags
|= pic
->pulldown
<< 6;
626 result
.sps_info_flags
|= pic
->interlace
<< 5;
627 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
628 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
629 result
.sps_info_flags
|= pic
->psf
<< 1;
631 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
632 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
633 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
634 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
635 result
.pps_info_flags
|= pic
->multires
<< 21;
636 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
637 result
.pps_info_flags
|= pic
->overlap
<< 11;
638 result
.pps_info_flags
|= pic
->quantizer
<< 9;
639 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
640 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
641 result
.pps_info_flags
|= pic
->vstransform
<< 0;
643 if (pic
->base
.profile
!= PIPE_VIDEO_PROFILE_VC1_SIMPLE
) {
644 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
645 result
.pps_info_flags
|= pic
->rangered
<< 19;
646 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
647 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
648 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
649 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
650 result
.pps_info_flags
|= pic
->dquant
<< 1;
653 result
.chroma_format
= 1;
658 static uint32_t get_ref_pic_idx(struct radeon_decoder
*dec
, struct pipe_video_buffer
*ref
)
660 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
661 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
664 /* seems to be the most sane fallback */
668 /* get the frame number from the associated data */
669 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
671 /* limit the frame number to a valid range */
672 return MAX2(MIN2(frame
, max
), min
);
675 static rvcn_dec_message_mpeg2_vld_t
get_mpeg2_msg(struct radeon_decoder
*dec
,
676 struct pipe_mpeg12_picture_desc
*pic
)
678 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
679 rvcn_dec_message_mpeg2_vld_t result
;
682 memset(&result
, 0, sizeof(result
));
683 result
.decoded_pic_idx
= dec
->frame_number
;
685 result
.forward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[0]);
686 result
.backward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[1]);
688 if (pic
->intra_matrix
) {
689 result
.load_intra_quantiser_matrix
= 1;
690 for (i
= 0; i
< 64; ++i
) {
691 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
694 if (pic
->non_intra_matrix
) {
695 result
.load_nonintra_quantiser_matrix
= 1;
696 for (i
= 0; i
< 64; ++i
) {
697 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
701 result
.profile_and_level_indication
= 0;
702 result
.chroma_format
= 0x1;
704 result
.picture_coding_type
= pic
->picture_coding_type
;
705 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
706 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
707 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
708 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
709 result
.intra_dc_precision
= pic
->intra_dc_precision
;
710 result
.pic_structure
= pic
->picture_structure
;
711 result
.top_field_first
= pic
->top_field_first
;
712 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
713 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
714 result
.q_scale_type
= pic
->q_scale_type
;
715 result
.intra_vlc_format
= pic
->intra_vlc_format
;
716 result
.alternate_scan
= pic
->alternate_scan
;
721 static rvcn_dec_message_mpeg4_asp_vld_t
get_mpeg4_msg(struct radeon_decoder
*dec
,
722 struct pipe_mpeg4_picture_desc
*pic
)
724 rvcn_dec_message_mpeg4_asp_vld_t result
;
727 memset(&result
, 0, sizeof(result
));
728 result
.decoded_pic_idx
= dec
->frame_number
;
730 result
.forward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[0]);
731 result
.backward_ref_pic_idx
= get_ref_pic_idx(dec
, pic
->ref
[1]);
733 result
.variant_type
= 0;
734 result
.profile_and_level_indication
= 0xF0;
736 result
.video_object_layer_verid
= 0x5;
737 result
.video_object_layer_shape
= 0x0;
739 result
.video_object_layer_width
= dec
->base
.width
;
740 result
.video_object_layer_height
= dec
->base
.height
;
742 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
744 result
.short_video_header
= pic
->short_video_header
;
745 result
.interlaced
= pic
->interlaced
;
746 result
.load_intra_quant_mat
= 1;
747 result
.load_nonintra_quant_mat
= 1;
748 result
.quarter_sample
= pic
->quarter_sample
;
749 result
.complexity_estimation_disable
= 1;
750 result
.resync_marker_disable
= pic
->resync_marker_disable
;
751 result
.newpred_enable
= 0;
752 result
.reduced_resolution_vop_enable
= 0;
754 result
.quant_type
= pic
->quant_type
;
756 for (i
= 0; i
< 64; ++i
) {
757 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
758 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
764 static void rvcn_dec_message_create(struct radeon_decoder
*dec
)
766 rvcn_dec_message_header_t
*header
= dec
->msg
;
767 rvcn_dec_message_create_t
*create
= dec
->msg
+ sizeof(rvcn_dec_message_header_t
);
768 unsigned sizes
= sizeof(rvcn_dec_message_header_t
) + sizeof(rvcn_dec_message_create_t
);
770 memset(dec
->msg
, 0, sizes
);
771 header
->header_size
= sizeof(rvcn_dec_message_header_t
);
772 header
->total_size
= sizes
;
773 header
->num_buffers
= 1;
774 header
->msg_type
= RDECODE_MSG_CREATE
;
775 header
->stream_handle
= dec
->stream_handle
;
776 header
->status_report_feedback_number
= 0;
778 header
->index
[0].message_id
= RDECODE_MESSAGE_CREATE
;
779 header
->index
[0].offset
= sizeof(rvcn_dec_message_header_t
);
780 header
->index
[0].size
= sizeof(rvcn_dec_message_create_t
);
781 header
->index
[0].filled
= 0;
783 create
->stream_type
= dec
->stream_type
;
784 create
->session_flags
= 0;
785 create
->width_in_samples
= dec
->base
.width
;
786 create
->height_in_samples
= dec
->base
.height
;
789 static struct pb_buffer
*rvcn_dec_message_decode(struct radeon_decoder
*dec
,
790 struct pipe_video_buffer
*target
,
791 struct pipe_picture_desc
*picture
)
793 struct si_texture
*luma
= (struct si_texture
*)((struct vl_video_buffer
*)target
)->resources
[0];
794 struct si_texture
*chroma
=
795 (struct si_texture
*)((struct vl_video_buffer
*)target
)->resources
[1];
796 rvcn_dec_message_header_t
*header
;
797 rvcn_dec_message_index_t
*index
;
798 rvcn_dec_message_decode_t
*decode
;
799 unsigned sizes
= 0, offset_decode
, offset_codec
;
803 sizes
+= sizeof(rvcn_dec_message_header_t
);
804 index
= (void *)header
+ sizeof(rvcn_dec_message_header_t
);
805 sizes
+= sizeof(rvcn_dec_message_index_t
);
806 offset_decode
= sizes
;
807 decode
= (void *)index
+ sizeof(rvcn_dec_message_index_t
);
808 sizes
+= sizeof(rvcn_dec_message_decode_t
);
809 offset_codec
= sizes
;
810 codec
= (void *)decode
+ sizeof(rvcn_dec_message_decode_t
);
812 memset(dec
->msg
, 0, sizes
);
813 header
->header_size
= sizeof(rvcn_dec_message_header_t
);
814 header
->total_size
= sizes
;
815 header
->num_buffers
= 2;
816 header
->msg_type
= RDECODE_MSG_DECODE
;
817 header
->stream_handle
= dec
->stream_handle
;
818 header
->status_report_feedback_number
= dec
->frame_number
;
820 header
->index
[0].message_id
= RDECODE_MESSAGE_DECODE
;
821 header
->index
[0].offset
= offset_decode
;
822 header
->index
[0].size
= sizeof(rvcn_dec_message_decode_t
);
823 header
->index
[0].filled
= 0;
825 index
->offset
= offset_codec
;
826 index
->size
= sizeof(rvcn_dec_message_avc_t
);
829 decode
->stream_type
= dec
->stream_type
;
830 decode
->decode_flags
= 0;
831 decode
->width_in_samples
= dec
->base
.width
;
832 decode
->height_in_samples
= dec
->base
.height
;
834 decode
->bsd_size
= align(dec
->bs_size
, 128);
835 decode
->dpb_size
= dec
->dpb
.res
->buf
->size
;
836 decode
->dt_size
= si_resource(((struct vl_video_buffer
*)target
)->resources
[0])->buf
->size
+
837 si_resource(((struct vl_video_buffer
*)target
)->resources
[1])->buf
->size
;
839 decode
->sct_size
= 0;
840 decode
->sc_coeff_size
= 0;
842 decode
->sw_ctxt_size
= RDECODE_SESSION_CONTEXT_SIZE
;
843 decode
->db_pitch
= (((struct si_screen
*)dec
->screen
)->info
.family
>= CHIP_RENOIR
&&
844 dec
->base
.width
> 32 && dec
->stream_type
== RDECODE_CODEC_VP9
)
845 ? align(dec
->base
.width
, 64)
846 : align(dec
->base
.width
, 32);
847 if (((struct si_screen
*)dec
->screen
)->info
.family
>= CHIP_SIENNA_CICHLID
&&
848 dec
->stream_type
== RDECODE_CODEC_VP9
)
849 decode
->db_aligned_height
= align(dec
->base
.height
, 64);
851 decode
->db_surf_tile_config
= 0;
853 decode
->dt_pitch
= luma
->surface
.u
.gfx9
.surf_pitch
* luma
->surface
.blk_w
;
854 decode
->dt_uv_pitch
= decode
->dt_pitch
/ 2;
856 decode
->dt_tiling_mode
= 0;
857 decode
->dt_swizzle_mode
= RDECODE_SW_MODE_LINEAR
;
858 decode
->dt_array_mode
= RDECODE_ARRAY_MODE_LINEAR
;
859 decode
->dt_field_mode
= ((struct vl_video_buffer
*)target
)->base
.interlaced
;
860 decode
->dt_surf_tile_config
= 0;
861 decode
->dt_uv_surf_tile_config
= 0;
863 decode
->dt_luma_top_offset
= luma
->surface
.u
.gfx9
.surf_offset
;
864 decode
->dt_chroma_top_offset
= chroma
->surface
.u
.gfx9
.surf_offset
;
865 if (decode
->dt_field_mode
) {
866 decode
->dt_luma_bottom_offset
=
867 luma
->surface
.u
.gfx9
.surf_offset
+ luma
->surface
.u
.gfx9
.surf_slice_size
;
868 decode
->dt_chroma_bottom_offset
=
869 chroma
->surface
.u
.gfx9
.surf_offset
+ chroma
->surface
.u
.gfx9
.surf_slice_size
;
871 decode
->dt_luma_bottom_offset
= decode
->dt_luma_top_offset
;
872 decode
->dt_chroma_bottom_offset
= decode
->dt_chroma_top_offset
;
875 switch (u_reduce_video_profile(picture
->profile
)) {
876 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
877 rvcn_dec_message_avc_t avc
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
878 memcpy(codec
, (void *)&avc
, sizeof(rvcn_dec_message_avc_t
));
879 index
->message_id
= RDECODE_MESSAGE_AVC
;
882 case PIPE_VIDEO_FORMAT_HEVC
: {
883 rvcn_dec_message_hevc_t hevc
=
884 get_h265_msg(dec
, target
, (struct pipe_h265_picture_desc
*)picture
);
886 memcpy(codec
, (void *)&hevc
, sizeof(rvcn_dec_message_hevc_t
));
887 index
->message_id
= RDECODE_MESSAGE_HEVC
;
888 if (dec
->ctx
.res
== NULL
) {
890 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
891 ctx_size
= calc_ctx_size_h265_main10(dec
, (struct pipe_h265_picture_desc
*)picture
);
893 ctx_size
= calc_ctx_size_h265_main(dec
);
894 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
))
895 RVID_ERR("Can't allocated context buffer.\n");
896 si_vid_clear_buffer(dec
->base
.context
, &dec
->ctx
);
900 case PIPE_VIDEO_FORMAT_VC1
: {
901 rvcn_dec_message_vc1_t vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
903 memcpy(codec
, (void *)&vc1
, sizeof(rvcn_dec_message_vc1_t
));
904 if ((picture
->profile
== PIPE_VIDEO_PROFILE_VC1_SIMPLE
) ||
905 (picture
->profile
== PIPE_VIDEO_PROFILE_VC1_MAIN
)) {
906 decode
->width_in_samples
= align(decode
->width_in_samples
, 16) / 16;
907 decode
->height_in_samples
= align(decode
->height_in_samples
, 16) / 16;
909 index
->message_id
= RDECODE_MESSAGE_VC1
;
912 case PIPE_VIDEO_FORMAT_MPEG12
: {
913 rvcn_dec_message_mpeg2_vld_t mpeg2
=
914 get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
916 memcpy(codec
, (void *)&mpeg2
, sizeof(rvcn_dec_message_mpeg2_vld_t
));
917 index
->message_id
= RDECODE_MESSAGE_MPEG2_VLD
;
920 case PIPE_VIDEO_FORMAT_MPEG4
: {
921 rvcn_dec_message_mpeg4_asp_vld_t mpeg4
=
922 get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
924 memcpy(codec
, (void *)&mpeg4
, sizeof(rvcn_dec_message_mpeg4_asp_vld_t
));
925 index
->message_id
= RDECODE_MESSAGE_MPEG4_ASP_VLD
;
928 case PIPE_VIDEO_FORMAT_VP9
: {
929 rvcn_dec_message_vp9_t vp9
=
930 get_vp9_msg(dec
, target
, (struct pipe_vp9_picture_desc
*)picture
);
932 memcpy(codec
, (void *)&vp9
, sizeof(rvcn_dec_message_vp9_t
));
933 index
->message_id
= RDECODE_MESSAGE_VP9
;
935 if (dec
->ctx
.res
== NULL
) {
939 /* default probability + probability data */
942 if (((struct si_screen
*)dec
->screen
)->info
.family
>= CHIP_RENOIR
) {
943 /* SRE collocated context data */
944 ctx_size
+= 32 * 2 * 128 * 68;
945 /* SMP collocated context data */
946 ctx_size
+= 9 * 64 * 2 * 128 * 68;
947 /* SDB left tile pixel */
948 ctx_size
+= 8 * 2 * 2 * 8192;
950 ctx_size
+= 32 * 2 * 64 * 64;
951 ctx_size
+= 9 * 64 * 2 * 64 * 64;
952 ctx_size
+= 8 * 2 * 4096;
955 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_VP9_PROFILE2
)
956 ctx_size
+= 8 * 2 * 4096;
958 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
))
959 RVID_ERR("Can't allocated context buffer.\n");
960 si_vid_clear_buffer(dec
->base
.context
, &dec
->ctx
);
962 /* ctx needs probs table */
963 ptr
= dec
->ws
->buffer_map(dec
->ctx
.res
->buf
, dec
->cs
,
964 PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
965 fill_probs_table(ptr
);
966 dec
->ws
->buffer_unmap(dec
->ctx
.res
->buf
);
976 decode
->hw_ctxt_size
= dec
->ctx
.res
->buf
->size
;
978 return luma
->buffer
.buf
;
981 static void rvcn_dec_message_destroy(struct radeon_decoder
*dec
)
983 rvcn_dec_message_header_t
*header
= dec
->msg
;
985 memset(dec
->msg
, 0, sizeof(rvcn_dec_message_header_t
));
986 header
->header_size
= sizeof(rvcn_dec_message_header_t
);
987 header
->total_size
= sizeof(rvcn_dec_message_header_t
) - sizeof(rvcn_dec_message_index_t
);
988 header
->num_buffers
= 0;
989 header
->msg_type
= RDECODE_MSG_DESTROY
;
990 header
->stream_handle
= dec
->stream_handle
;
991 header
->status_report_feedback_number
= 0;
994 static void rvcn_dec_message_feedback(struct radeon_decoder
*dec
)
996 rvcn_dec_feedback_header_t
*header
= (void *)dec
->fb
;
998 header
->header_size
= sizeof(rvcn_dec_feedback_header_t
);
999 header
->total_size
= sizeof(rvcn_dec_feedback_header_t
);
1000 header
->num_buffers
= 0;
1003 /* flush IB to the hardware */
1004 static int flush(struct radeon_decoder
*dec
, unsigned flags
)
1006 return dec
->ws
->cs_flush(dec
->cs
, flags
, NULL
);
1009 /* add a new set register command to the IB */
1010 static void set_reg(struct radeon_decoder
*dec
, unsigned reg
, uint32_t val
)
1012 radeon_emit(dec
->cs
, RDECODE_PKT0(reg
>> 2, 0));
1013 radeon_emit(dec
->cs
, val
);
1016 /* send a command to the VCPU through the GPCOM registers */
1017 static void send_cmd(struct radeon_decoder
*dec
, unsigned cmd
, struct pb_buffer
*buf
, uint32_t off
,
1018 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
1022 dec
->ws
->cs_add_buffer(dec
->cs
, buf
, usage
| RADEON_USAGE_SYNCHRONIZED
, domain
, 0);
1023 addr
= dec
->ws
->buffer_get_virtual_address(buf
);
1026 set_reg(dec
, dec
->reg
.data0
, addr
);
1027 set_reg(dec
, dec
->reg
.data1
, addr
>> 32);
1028 set_reg(dec
, dec
->reg
.cmd
, cmd
<< 1);
1031 /* do the codec needs an IT buffer ?*/
1032 static bool have_it(struct radeon_decoder
*dec
)
1034 return dec
->stream_type
== RDECODE_CODEC_H264_PERF
|| dec
->stream_type
== RDECODE_CODEC_H265
;
1037 /* do the codec needs an probs buffer? */
1038 static bool have_probs(struct radeon_decoder
*dec
)
1040 return dec
->stream_type
== RDECODE_CODEC_VP9
;
1043 /* map the next available message/feedback/itscaling buffer */
1044 static void map_msg_fb_it_probs_buf(struct radeon_decoder
*dec
)
1046 struct rvid_buffer
*buf
;
1049 /* grab the current message/feedback buffer */
1050 buf
= &dec
->msg_fb_it_probs_buffers
[dec
->cur_buffer
];
1052 /* and map it for CPU access */
1054 dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
, PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
1056 /* calc buffer offsets */
1059 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
1061 dec
->it
= (uint8_t *)(ptr
+ FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
);
1062 else if (have_probs(dec
))
1063 dec
->probs
= (uint8_t *)(ptr
+ FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
);
1066 /* unmap and send a message command to the VCPU */
1067 static void send_msg_buf(struct radeon_decoder
*dec
)
1069 struct rvid_buffer
*buf
;
1071 /* ignore the request if message/feedback buffer isn't mapped */
1072 if (!dec
->msg
|| !dec
->fb
)
1075 /* grab the current message buffer */
1076 buf
= &dec
->msg_fb_it_probs_buffers
[dec
->cur_buffer
];
1078 /* unmap the buffer */
1079 dec
->ws
->buffer_unmap(buf
->res
->buf
);
1085 if (dec
->sessionctx
.res
)
1086 send_cmd(dec
, RDECODE_CMD_SESSION_CONTEXT_BUFFER
, dec
->sessionctx
.res
->buf
, 0,
1087 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1089 /* and send it to the hardware */
1090 send_cmd(dec
, RDECODE_CMD_MSG_BUFFER
, buf
->res
->buf
, 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1093 /* cycle to the next set of buffers */
1094 static void next_buffer(struct radeon_decoder
*dec
)
1097 dec
->cur_buffer
%= NUM_BUFFERS
;
1100 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder
*dec
)
1102 unsigned width_in_mb
, height_in_mb
, ctx_size
;
1103 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
1104 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
1106 unsigned max_references
= dec
->base
.max_references
+ 1;
1108 // picture width & height in 16 pixel units
1109 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
1110 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
1112 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
1113 unsigned num_dpb_buffer
;
1114 switch (dec
->base
.level
) {
1116 num_dpb_buffer
= 8100 / fs_in_mb
;
1119 num_dpb_buffer
= 18000 / fs_in_mb
;
1122 num_dpb_buffer
= 20480 / fs_in_mb
;
1125 num_dpb_buffer
= 32768 / fs_in_mb
;
1128 num_dpb_buffer
= 34816 / fs_in_mb
;
1131 num_dpb_buffer
= 110400 / fs_in_mb
;
1134 num_dpb_buffer
= 184320 / fs_in_mb
;
1137 num_dpb_buffer
= 184320 / fs_in_mb
;
1141 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
1142 ctx_size
= max_references
* align(width_in_mb
* height_in_mb
* 192, 256);
1147 /* calculate size of reference picture buffer */
1148 static unsigned calc_dpb_size(struct radeon_decoder
*dec
)
1150 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
1152 // always align them to MB size for dpb calculation
1153 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
1154 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
1156 // always one more for currently decoded picture
1157 unsigned max_references
= dec
->base
.max_references
+ 1;
1159 // aligned size of a single frame
1160 image_size
= align(width
, 32) * height
;
1161 image_size
+= image_size
/ 2;
1162 image_size
= align(image_size
, 1024);
1164 // picture width & height in 16 pixel units
1165 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
1166 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
1168 switch (u_reduce_video_profile(dec
->base
.profile
)) {
1169 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
1170 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
1171 unsigned num_dpb_buffer
;
1173 switch (dec
->base
.level
) {
1175 num_dpb_buffer
= 8100 / fs_in_mb
;
1178 num_dpb_buffer
= 18000 / fs_in_mb
;
1181 num_dpb_buffer
= 20480 / fs_in_mb
;
1184 num_dpb_buffer
= 32768 / fs_in_mb
;
1187 num_dpb_buffer
= 34816 / fs_in_mb
;
1190 num_dpb_buffer
= 110400 / fs_in_mb
;
1193 num_dpb_buffer
= 184320 / fs_in_mb
;
1196 num_dpb_buffer
= 184320 / fs_in_mb
;
1200 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
1201 dpb_size
= image_size
* max_references
;
1205 case PIPE_VIDEO_FORMAT_HEVC
:
1206 if (dec
->base
.width
* dec
->base
.height
>= 4096 * 2000)
1207 max_references
= MAX2(max_references
, 8);
1209 max_references
= MAX2(max_references
, 17);
1211 width
= align(width
, 16);
1212 height
= align(height
, 16);
1213 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
1214 dpb_size
= align((align(width
, 32) * height
* 9) / 4, 256) * max_references
;
1216 dpb_size
= align((align(width
, 32) * height
* 3) / 2, 256) * max_references
;
1219 case PIPE_VIDEO_FORMAT_VC1
:
1220 // the firmware seems to allways assume a minimum of ref frames
1221 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
1223 // reference picture buffer
1224 dpb_size
= image_size
* max_references
;
1227 dpb_size
+= width_in_mb
* height_in_mb
* 128;
1229 // IT surface buffer
1230 dpb_size
+= width_in_mb
* 64;
1232 // DB surface buffer
1233 dpb_size
+= width_in_mb
* 128;
1236 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
1239 case PIPE_VIDEO_FORMAT_MPEG12
:
1240 // reference picture buffer, must be big enough for all frames
1241 dpb_size
= image_size
* NUM_MPEG2_REFS
;
1244 case PIPE_VIDEO_FORMAT_MPEG4
:
1245 // reference picture buffer
1246 dpb_size
= image_size
* max_references
;
1249 dpb_size
+= width_in_mb
* height_in_mb
* 64;
1251 // IT surface buffer
1252 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
1254 dpb_size
= MAX2(dpb_size
, 30 * 1024 * 1024);
1257 case PIPE_VIDEO_FORMAT_VP9
:
1258 max_references
= MAX2(max_references
, 9);
1260 dpb_size
= (((struct si_screen
*)dec
->screen
)->info
.family
>= CHIP_RENOIR
)
1261 ? (8192 * 4320 * 3 / 2) * max_references
1262 : (4096 * 3000 * 3 / 2) * max_references
;
1264 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_VP9_PROFILE2
)
1265 dpb_size
*= (3 / 2);
1268 case PIPE_VIDEO_FORMAT_JPEG
:
1273 // something is missing here
1276 // at least use a sane default value
1277 dpb_size
= 32 * 1024 * 1024;
1284 * destroy this video decoder
1286 static void radeon_dec_destroy(struct pipe_video_codec
*decoder
)
1288 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1293 map_msg_fb_it_probs_buf(dec
);
1294 rvcn_dec_message_destroy(dec
);
1299 dec
->ws
->cs_destroy(dec
->cs
);
1301 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1302 si_vid_destroy_buffer(&dec
->msg_fb_it_probs_buffers
[i
]);
1303 si_vid_destroy_buffer(&dec
->bs_buffers
[i
]);
1306 si_vid_destroy_buffer(&dec
->dpb
);
1307 si_vid_destroy_buffer(&dec
->ctx
);
1308 si_vid_destroy_buffer(&dec
->sessionctx
);
1314 * start decoding of a new frame
1316 static void radeon_dec_begin_frame(struct pipe_video_codec
*decoder
,
1317 struct pipe_video_buffer
*target
,
1318 struct pipe_picture_desc
*picture
)
1320 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1325 frame
= ++dec
->frame_number
;
1326 if (dec
->stream_type
!= RDECODE_CODEC_VP9
)
1327 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
1328 &radeon_dec_destroy_associated_data
);
1331 dec
->bs_ptr
= dec
->ws
->buffer_map(dec
->bs_buffers
[dec
->cur_buffer
].res
->buf
, dec
->cs
,
1332 PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
1336 * decode a macroblock
1338 static void radeon_dec_decode_macroblock(struct pipe_video_codec
*decoder
,
1339 struct pipe_video_buffer
*target
,
1340 struct pipe_picture_desc
*picture
,
1341 const struct pipe_macroblock
*macroblocks
,
1342 unsigned num_macroblocks
)
1344 /* not supported (yet) */
1349 * decode a bitstream
1351 static void radeon_dec_decode_bitstream(struct pipe_video_codec
*decoder
,
1352 struct pipe_video_buffer
*target
,
1353 struct pipe_picture_desc
*picture
, unsigned num_buffers
,
1354 const void *const *buffers
, const unsigned *sizes
)
1356 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1364 for (i
= 0; i
< num_buffers
; ++i
) {
1365 struct rvid_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
1366 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
1368 if (new_size
> buf
->res
->buf
->size
) {
1369 dec
->ws
->buffer_unmap(buf
->res
->buf
);
1370 if (!si_vid_resize_buffer(dec
->screen
, dec
->cs
, buf
, new_size
)) {
1371 RVID_ERR("Can't resize bitstream buffer!");
1375 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
1376 PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
1380 dec
->bs_ptr
+= dec
->bs_size
;
1383 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
1384 dec
->bs_size
+= sizes
[i
];
1385 dec
->bs_ptr
+= sizes
[i
];
1390 * send cmd for vcn dec
1392 void send_cmd_dec(struct radeon_decoder
*dec
, struct pipe_video_buffer
*target
,
1393 struct pipe_picture_desc
*picture
)
1395 struct pb_buffer
*dt
;
1396 struct rvid_buffer
*msg_fb_it_probs_buf
, *bs_buf
;
1398 msg_fb_it_probs_buf
= &dec
->msg_fb_it_probs_buffers
[dec
->cur_buffer
];
1399 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
1401 memset(dec
->bs_ptr
, 0, align(dec
->bs_size
, 128) - dec
->bs_size
);
1402 dec
->ws
->buffer_unmap(bs_buf
->res
->buf
);
1404 map_msg_fb_it_probs_buf(dec
);
1405 dt
= rvcn_dec_message_decode(dec
, target
, picture
);
1406 rvcn_dec_message_feedback(dec
);
1409 send_cmd(dec
, RDECODE_CMD_DPB_BUFFER
, dec
->dpb
.res
->buf
, 0, RADEON_USAGE_READWRITE
,
1410 RADEON_DOMAIN_VRAM
);
1412 send_cmd(dec
, RDECODE_CMD_CONTEXT_BUFFER
, dec
->ctx
.res
->buf
, 0, RADEON_USAGE_READWRITE
,
1413 RADEON_DOMAIN_VRAM
);
1414 send_cmd(dec
, RDECODE_CMD_BITSTREAM_BUFFER
, bs_buf
->res
->buf
, 0, RADEON_USAGE_READ
,
1416 send_cmd(dec
, RDECODE_CMD_DECODING_TARGET_BUFFER
, dt
, 0, RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
1417 send_cmd(dec
, RDECODE_CMD_FEEDBACK_BUFFER
, msg_fb_it_probs_buf
->res
->buf
, FB_BUFFER_OFFSET
,
1418 RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
1420 send_cmd(dec
, RDECODE_CMD_IT_SCALING_TABLE_BUFFER
, msg_fb_it_probs_buf
->res
->buf
,
1421 FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1422 else if (have_probs(dec
))
1423 send_cmd(dec
, RDECODE_CMD_PROB_TBL_BUFFER
, msg_fb_it_probs_buf
->res
->buf
,
1424 FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1425 set_reg(dec
, dec
->reg
.cntl
, 1);
1429 * end decoding of the current frame
1431 static void radeon_dec_end_frame(struct pipe_video_codec
*decoder
, struct pipe_video_buffer
*target
,
1432 struct pipe_picture_desc
*picture
)
1434 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
1441 dec
->send_cmd(dec
, target
, picture
);
1442 flush(dec
, PIPE_FLUSH_ASYNC
);
1447 * flush any outstanding command buffers to the hardware
1449 static void radeon_dec_flush(struct pipe_video_codec
*decoder
)
1454 * create and HW decoder
1456 struct pipe_video_codec
*radeon_create_decoder(struct pipe_context
*context
,
1457 const struct pipe_video_codec
*templ
)
1459 struct si_context
*sctx
= (struct si_context
*)context
;
1460 struct radeon_winsys
*ws
= sctx
->ws
;
1461 unsigned width
= templ
->width
, height
= templ
->height
;
1462 unsigned dpb_size
, bs_buf_size
, stream_type
= 0, ring
= RING_VCN_DEC
;
1463 struct radeon_decoder
*dec
;
1466 switch (u_reduce_video_profile(templ
->profile
)) {
1467 case PIPE_VIDEO_FORMAT_MPEG12
:
1468 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
)
1469 return vl_create_mpeg12_decoder(context
, templ
);
1470 stream_type
= RDECODE_CODEC_MPEG2_VLD
;
1472 case PIPE_VIDEO_FORMAT_MPEG4
:
1473 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1474 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1475 stream_type
= RDECODE_CODEC_MPEG4
;
1477 case PIPE_VIDEO_FORMAT_VC1
:
1478 stream_type
= RDECODE_CODEC_VC1
;
1480 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1481 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1482 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1483 stream_type
= RDECODE_CODEC_H264_PERF
;
1485 case PIPE_VIDEO_FORMAT_HEVC
:
1486 stream_type
= RDECODE_CODEC_H265
;
1488 case PIPE_VIDEO_FORMAT_VP9
:
1489 stream_type
= RDECODE_CODEC_VP9
;
1491 case PIPE_VIDEO_FORMAT_JPEG
:
1492 stream_type
= RDECODE_CODEC_JPEG
;
1493 ring
= RING_VCN_JPEG
;
1500 dec
= CALLOC_STRUCT(radeon_decoder
);
1506 dec
->base
.context
= context
;
1507 dec
->base
.width
= width
;
1508 dec
->base
.height
= height
;
1510 dec
->base
.destroy
= radeon_dec_destroy
;
1511 dec
->base
.begin_frame
= radeon_dec_begin_frame
;
1512 dec
->base
.decode_macroblock
= radeon_dec_decode_macroblock
;
1513 dec
->base
.decode_bitstream
= radeon_dec_decode_bitstream
;
1514 dec
->base
.end_frame
= radeon_dec_end_frame
;
1515 dec
->base
.flush
= radeon_dec_flush
;
1517 dec
->stream_type
= stream_type
;
1518 dec
->stream_handle
= si_vid_alloc_stream_handle();
1519 dec
->screen
= context
->screen
;
1521 dec
->cs
= ws
->cs_create(sctx
->ctx
, ring
, NULL
, NULL
, false);
1523 RVID_ERR("Can't get command submission context.\n");
1527 for (i
= 0; i
< ARRAY_SIZE(dec
->render_pic_list
); i
++)
1528 dec
->render_pic_list
[i
] = NULL
;
1529 bs_buf_size
= width
* height
* (512 / (16 * 16));
1530 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1531 unsigned msg_fb_it_probs_size
= FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
;
1533 msg_fb_it_probs_size
+= IT_SCALING_TABLE_SIZE
;
1534 else if (have_probs(dec
))
1535 msg_fb_it_probs_size
+= VP9_PROBS_TABLE_SIZE
;
1536 /* use vram to improve performance, workaround an unknown bug */
1537 if (!si_vid_create_buffer(dec
->screen
, &dec
->msg_fb_it_probs_buffers
[i
], msg_fb_it_probs_size
,
1538 PIPE_USAGE_DEFAULT
)) {
1539 RVID_ERR("Can't allocated message buffers.\n");
1543 if (!si_vid_create_buffer(dec
->screen
, &dec
->bs_buffers
[i
], bs_buf_size
,
1544 PIPE_USAGE_STAGING
)) {
1545 RVID_ERR("Can't allocated bitstream buffers.\n");
1549 si_vid_clear_buffer(context
, &dec
->msg_fb_it_probs_buffers
[i
]);
1550 si_vid_clear_buffer(context
, &dec
->bs_buffers
[i
]);
1552 if (have_probs(dec
)) {
1553 struct rvid_buffer
*buf
;
1556 buf
= &dec
->msg_fb_it_probs_buffers
[i
];
1557 ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
1558 PIPE_TRANSFER_WRITE
| RADEON_TRANSFER_TEMPORARY
);
1559 ptr
+= FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
;
1560 fill_probs_table(ptr
);
1561 dec
->ws
->buffer_unmap(buf
->res
->buf
);
1565 dpb_size
= calc_dpb_size(dec
);
1567 if (!si_vid_create_buffer(dec
->screen
, &dec
->dpb
, dpb_size
, PIPE_USAGE_DEFAULT
)) {
1568 RVID_ERR("Can't allocated dpb.\n");
1571 si_vid_clear_buffer(context
, &dec
->dpb
);
1574 if (dec
->stream_type
== RDECODE_CODEC_H264_PERF
) {
1575 unsigned ctx_size
= calc_ctx_size_h264_perf(dec
);
1576 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
)) {
1577 RVID_ERR("Can't allocated context buffer.\n");
1580 si_vid_clear_buffer(context
, &dec
->ctx
);
1583 if (!si_vid_create_buffer(dec
->screen
, &dec
->sessionctx
, RDECODE_SESSION_CONTEXT_SIZE
,
1584 PIPE_USAGE_DEFAULT
)) {
1585 RVID_ERR("Can't allocated session ctx.\n");
1588 si_vid_clear_buffer(context
, &dec
->sessionctx
);
1590 switch (sctx
->family
) {
1593 dec
->reg
.data0
= RDECODE_VCN1_GPCOM_VCPU_DATA0
;
1594 dec
->reg
.data1
= RDECODE_VCN1_GPCOM_VCPU_DATA1
;
1595 dec
->reg
.cmd
= RDECODE_VCN1_GPCOM_VCPU_CMD
;
1596 dec
->reg
.cntl
= RDECODE_VCN1_ENGINE_CNTL
;
1597 dec
->jpg
.direct_reg
= false;
1603 dec
->reg
.data0
= RDECODE_VCN2_GPCOM_VCPU_DATA0
;
1604 dec
->reg
.data1
= RDECODE_VCN2_GPCOM_VCPU_DATA1
;
1605 dec
->reg
.cmd
= RDECODE_VCN2_GPCOM_VCPU_CMD
;
1606 dec
->reg
.cntl
= RDECODE_VCN2_ENGINE_CNTL
;
1607 dec
->jpg
.direct_reg
= true;
1610 case CHIP_SIENNA_CICHLID
:
1611 case CHIP_NAVY_FLOUNDER
:
1612 dec
->reg
.data0
= RDECODE_VCN2_5_GPCOM_VCPU_DATA0
;
1613 dec
->reg
.data1
= RDECODE_VCN2_5_GPCOM_VCPU_DATA1
;
1614 dec
->reg
.cmd
= RDECODE_VCN2_5_GPCOM_VCPU_CMD
;
1615 dec
->reg
.cntl
= RDECODE_VCN2_5_ENGINE_CNTL
;
1616 dec
->jpg
.direct_reg
= true;
1619 RVID_ERR("VCN is not supported.\n");
1623 map_msg_fb_it_probs_buf(dec
);
1624 rvcn_dec_message_create(dec
);
1632 if (stream_type
== RDECODE_CODEC_JPEG
)
1633 dec
->send_cmd
= send_cmd_jpeg
;
1635 dec
->send_cmd
= send_cmd_dec
;
1641 dec
->ws
->cs_destroy(dec
->cs
);
1643 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1644 si_vid_destroy_buffer(&dec
->msg_fb_it_probs_buffers
[i
]);
1645 si_vid_destroy_buffer(&dec
->bs_buffers
[i
]);
1648 si_vid_destroy_buffer(&dec
->dpb
);
1649 si_vid_destroy_buffer(&dec
->ctx
);
1650 si_vid_destroy_buffer(&dec
->sessionctx
);