radeon/vcn: add decode message for vc1 codec
[mesa.git] / src / gallium / drivers / radeon / radeon_vcn_dec.c
1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <assert.h>
29 #include <stdio.h>
30
31 #include "pipe/p_video_codec.h"
32
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
35
36 #include "vl/vl_mpeg12_decoder.h"
37
38 #include "r600_pipe_common.h"
39 #include "radeon_video.h"
40 #include "radeon_vcn_dec.h"
41
42 #define FB_BUFFER_OFFSET 0x1000
43 #define FB_BUFFER_SIZE 2048
44 #define IT_SCALING_TABLE_SIZE 992
45 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
46
47 #define RDECODE_GPCOM_VCPU_CMD 0x2070c
48 #define RDECODE_GPCOM_VCPU_DATA0 0x20710
49 #define RDECODE_GPCOM_VCPU_DATA1 0x20714
50 #define RDECODE_ENGINE_CNTL 0x20718
51
52 #define NUM_BUFFERS 4
53 #define NUM_MPEG2_REFS 6
54 #define NUM_H264_REFS 17
55 #define NUM_VC1_REFS 5
56
57 struct radeon_decoder {
58 struct pipe_video_codec base;
59
60 unsigned stream_handle;
61 unsigned stream_type;
62 unsigned frame_number;
63
64 struct pipe_screen *screen;
65 struct radeon_winsys *ws;
66 struct radeon_winsys_cs *cs;
67
68 void *msg;
69 uint32_t *fb;
70 uint8_t *it;
71 void *bs_ptr;
72
73 struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
74 struct rvid_buffer bs_buffers[NUM_BUFFERS];
75 struct rvid_buffer dpb;
76 struct rvid_buffer ctx;
77 struct rvid_buffer sessionctx;
78
79 unsigned bs_size;
80 unsigned cur_buffer;
81 };
82
83 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
84 struct pipe_h264_picture_desc *pic)
85 {
86 rvcn_dec_message_avc_t result;
87
88 memset(&result, 0, sizeof(result));
89 switch (pic->base.profile) {
90 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
91 result.profile = RDECODE_H264_PROFILE_BASELINE;
92 break;
93
94 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
95 result.profile = RDECODE_H264_PROFILE_MAIN;
96 break;
97
98 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
99 result.profile = RDECODE_H264_PROFILE_HIGH;
100 break;
101
102 default:
103 assert(0);
104 break;
105 }
106
107 result.level = dec->base.level;
108
109 result.sps_info_flags = 0;
110 result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
111 result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
112 result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
113 result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
114
115 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
116 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
117 result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
118 result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
119 result.log2_max_pic_order_cnt_lsb_minus4 =
120 pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
121
122 switch (dec->base.chroma_format) {
123 case PIPE_VIDEO_CHROMA_FORMAT_NONE:
124 break;
125 case PIPE_VIDEO_CHROMA_FORMAT_400:
126 result.chroma_format = 0;
127 break;
128 case PIPE_VIDEO_CHROMA_FORMAT_420:
129 result.chroma_format = 1;
130 break;
131 case PIPE_VIDEO_CHROMA_FORMAT_422:
132 result.chroma_format = 2;
133 break;
134 case PIPE_VIDEO_CHROMA_FORMAT_444:
135 result.chroma_format = 3;
136 break;
137 }
138
139 result.pps_info_flags = 0;
140 result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
141 result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
142 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
143 result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
144 result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
145 result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
146 result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
147 result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
148
149 result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
150 result.slice_group_map_type = pic->pps->slice_group_map_type;
151 result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
152 result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
153 result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
154 result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
155
156 memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
157 memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
158
159 memcpy(dec->it, result.scaling_list_4x4, 6*16);
160 memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
161
162 result.num_ref_frames = pic->num_ref_frames;
163
164 result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
165 result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
166
167 result.frame_num = pic->frame_num;
168 memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
169 result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
170 result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
171 memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
172
173 result.decoded_pic_idx = pic->frame_num;
174
175 return result;
176 }
177
178 static void radeon_dec_destroy_associated_data(void *data)
179 {
180 /* NOOP, since we only use an intptr */
181 }
182
183 static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
184 struct pipe_video_buffer *target,
185 struct pipe_h265_picture_desc *pic)
186 {
187 rvcn_dec_message_hevc_t result;
188 unsigned i;
189
190 memset(&result, 0, sizeof(result));
191 result.sps_info_flags = 0;
192 result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
193 result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
194 result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
195 result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
196 result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
197 result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
198 result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
199 result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
200 result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
201 if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
202 result.sps_info_flags |= 1 << 9;
203 if (pic->UseRefPicList == true)
204 result.sps_info_flags |= 1 << 10;
205
206 result.chroma_format = pic->pps->sps->chroma_format_idc;
207 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
208 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
209 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
210 result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
211 result.log2_min_luma_coding_block_size_minus3 =
212 pic->pps->sps->log2_min_luma_coding_block_size_minus3;
213 result.log2_diff_max_min_luma_coding_block_size =
214 pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
215 result.log2_min_transform_block_size_minus2 =
216 pic->pps->sps->log2_min_transform_block_size_minus2;
217 result.log2_diff_max_min_transform_block_size =
218 pic->pps->sps->log2_diff_max_min_transform_block_size;
219 result.max_transform_hierarchy_depth_inter =
220 pic->pps->sps->max_transform_hierarchy_depth_inter;
221 result.max_transform_hierarchy_depth_intra =
222 pic->pps->sps->max_transform_hierarchy_depth_intra;
223 result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
224 result.pcm_sample_bit_depth_chroma_minus1 =
225 pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
226 result.log2_min_pcm_luma_coding_block_size_minus3 =
227 pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
228 result.log2_diff_max_min_pcm_luma_coding_block_size =
229 pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
230 result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
231
232 result.pps_info_flags = 0;
233 result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
234 result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
235 result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
236 result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
237 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
238 result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
239 result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
240 result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
241 result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
242 result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
243 result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
244 result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
245 result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
246 result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
247 result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
248 result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
249 result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
250 result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
251 result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
252 result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
253
254 result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
255 result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
256 result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
257 result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
258 result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
259 result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
260 result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
261 result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
262 result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
263 result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
264 result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
265 result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
266 result.init_qp_minus26 = pic->pps->init_qp_minus26;
267
268 for (i = 0; i < 19; ++i)
269 result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
270
271 for (i = 0; i < 21; ++i)
272 result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
273
274 result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
275 result.curr_idx = pic->CurrPicOrderCntVal;
276 result.curr_poc = pic->CurrPicOrderCntVal;
277
278 vl_video_buffer_set_associated_data(target, &dec->base,
279 (void *)(uintptr_t)pic->CurrPicOrderCntVal,
280 &radeon_dec_destroy_associated_data);
281
282 for (i = 0; i < 16; ++i) {
283 struct pipe_video_buffer *ref = pic->ref[i];
284 uintptr_t ref_pic = 0;
285
286 result.poc_list[i] = pic->PicOrderCntVal[i];
287
288 if (ref)
289 ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
290 else
291 ref_pic = 0x7F;
292 result.ref_pic_list[i] = ref_pic;
293 }
294
295 for (i = 0; i < 8; ++i) {
296 result.ref_pic_set_st_curr_before[i] = 0xFF;
297 result.ref_pic_set_st_curr_after[i] = 0xFF;
298 result.ref_pic_set_lt_curr[i] = 0xFF;
299 }
300
301 for (i = 0; i < pic->NumPocStCurrBefore; ++i)
302 result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
303
304 for (i = 0; i < pic->NumPocStCurrAfter; ++i)
305 result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
306
307 for (i = 0; i < pic->NumPocLtCurr; ++i)
308 result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
309
310 for (i = 0; i < 6; ++i)
311 result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
312
313 for (i = 0; i < 2; ++i)
314 result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
315
316 memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
317 memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
318 memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
319 memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
320
321 for (i = 0 ; i < 2 ; i++) {
322 for (int j = 0 ; j < 15 ; j++)
323 result.direct_reflist[i][j] = pic->RefPicList[i][j];
324 }
325
326 if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) &&
327 (target->buffer_format == PIPE_FORMAT_NV12)) {
328 result.p010_mode = 0;
329 result.luma_10to8 = 5;
330 result.chroma_10to8 = 5;
331 result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
332 result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
333 }
334
335 return result;
336 }
337
338 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
339 {
340 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
341 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
342
343 unsigned max_references = dec->base.max_references + 1;
344
345 if (dec->base.width * dec->base.height >= 4096*2000)
346 max_references = MAX2(max_references, 8);
347 else
348 max_references = MAX2(max_references, 17);
349
350 width = align (width, 16);
351 height = align (height, 16);
352 return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
353 }
354
355 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic)
356 {
357 unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
358 unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
359 unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
360
361 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
362 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
363 unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 ||
364 pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
365
366 unsigned max_references = dec->base.max_references + 1;
367
368 if (dec->base.width * dec->base.height >= 4096*2000)
369 max_references = MAX2(max_references, 8);
370 else
371 max_references = MAX2(max_references, 17);
372
373 block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
374 log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
375
376 width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
377 height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
378
379 num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
380 context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
381 max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
382
383 cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
384 db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
385
386 return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
387 }
388
389 static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic)
390 {
391 rvcn_dec_message_vc1_t result;
392
393 memset(&result, 0, sizeof(result));
394 switch(pic->base.profile) {
395 case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
396 result.profile = RDECODE_VC1_PROFILE_SIMPLE;
397 result.level = 1;
398 break;
399
400 case PIPE_VIDEO_PROFILE_VC1_MAIN:
401 result.profile = RDECODE_VC1_PROFILE_MAIN;
402 result.level = 2;
403 break;
404
405 case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
406 result.profile = RDECODE_VC1_PROFILE_ADVANCED;
407 result.level = 4;
408 break;
409
410 default:
411 assert(0);
412 }
413
414 result.sps_info_flags |= pic->postprocflag << 7;
415 result.sps_info_flags |= pic->pulldown << 6;
416 result.sps_info_flags |= pic->interlace << 5;
417 result.sps_info_flags |= pic->tfcntrflag << 4;
418 result.sps_info_flags |= pic->finterpflag << 3;
419 result.sps_info_flags |= pic->psf << 1;
420
421 result.pps_info_flags |= pic->range_mapy_flag << 31;
422 result.pps_info_flags |= pic->range_mapy << 28;
423 result.pps_info_flags |= pic->range_mapuv_flag << 27;
424 result.pps_info_flags |= pic->range_mapuv << 24;
425 result.pps_info_flags |= pic->multires << 21;
426 result.pps_info_flags |= pic->maxbframes << 16;
427 result.pps_info_flags |= pic->overlap << 11;
428 result.pps_info_flags |= pic->quantizer << 9;
429 result.pps_info_flags |= pic->panscan_flag << 7;
430 result.pps_info_flags |= pic->refdist_flag << 6;
431 result.pps_info_flags |= pic->vstransform << 0;
432
433 if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
434 result.pps_info_flags |= pic->syncmarker << 20;
435 result.pps_info_flags |= pic->rangered << 19;
436 result.pps_info_flags |= pic->loopfilter << 5;
437 result.pps_info_flags |= pic->fastuvmc << 4;
438 result.pps_info_flags |= pic->extended_mv << 3;
439 result.pps_info_flags |= pic->extended_dmv << 8;
440 result.pps_info_flags |= pic->dquant << 1;
441 }
442
443 result.chroma_format = 1;
444
445 return result;
446 }
447
448 static void rvcn_dec_message_create(struct radeon_decoder *dec)
449 {
450 rvcn_dec_message_header_t *header = dec->msg;
451 rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t);
452 unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
453
454 memset(dec->msg, 0, sizes);
455 header->header_size = sizeof(rvcn_dec_message_header_t);
456 header->total_size = sizes;
457 header->num_buffers = 1;
458 header->msg_type = RDECODE_MSG_CREATE;
459 header->stream_handle = dec->stream_handle;
460 header->status_report_feedback_number = 0;
461
462 header->index[0].message_id = RDECODE_MESSAGE_CREATE;
463 header->index[0].offset = sizeof(rvcn_dec_message_header_t);
464 header->index[0].size = sizeof(rvcn_dec_message_create_t);
465 header->index[0].filled = 0;
466
467 create->stream_type = dec->stream_type;
468 create->session_flags = 0;
469 create->width_in_samples = dec->base.width;
470 create->height_in_samples = dec->base.height;
471 }
472
473 static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
474 struct pipe_video_buffer *target,
475 struct pipe_picture_desc *picture)
476 {
477 struct r600_texture *luma = (struct r600_texture *)
478 ((struct vl_video_buffer *)target)->resources[0];
479 struct r600_texture *chroma = (struct r600_texture *)
480 ((struct vl_video_buffer *)target)->resources[1];
481 rvcn_dec_message_header_t *header;
482 rvcn_dec_message_index_t *index;
483 rvcn_dec_message_decode_t *decode;
484 unsigned sizes = 0, offset_decode, offset_codec;
485 void *codec;
486
487 header = dec->msg;
488 sizes += sizeof(rvcn_dec_message_header_t);
489 index = (void*)header + sizeof(rvcn_dec_message_header_t);
490 sizes += sizeof(rvcn_dec_message_index_t);
491 offset_decode = sizes;
492 decode = (void*)index + sizeof(rvcn_dec_message_index_t);
493 sizes += sizeof(rvcn_dec_message_decode_t);
494 offset_codec = sizes;
495 codec = (void*)decode + sizeof(rvcn_dec_message_decode_t);
496
497 memset(dec->msg, 0, sizes);
498 header->header_size = sizeof(rvcn_dec_message_header_t);
499 header->total_size = sizes;
500 header->num_buffers = 2;
501 header->msg_type = RDECODE_MSG_DECODE;
502 header->stream_handle = dec->stream_handle;
503 header->status_report_feedback_number = dec->frame_number;
504
505 header->index[0].message_id = RDECODE_MESSAGE_DECODE;
506 header->index[0].offset = offset_decode;
507 header->index[0].size = sizeof(rvcn_dec_message_decode_t);
508 header->index[0].filled = 0;
509
510 index->offset = offset_codec;
511 index->size = sizeof(rvcn_dec_message_avc_t);
512 index->filled = 0;
513
514 decode->stream_type = dec->stream_type;;
515 decode->decode_flags = 0x1;
516 decode->width_in_samples = dec->base.width;;
517 decode->height_in_samples = dec->base.height;;
518
519 decode->bsd_size = align(dec->bs_size, 128);
520 decode->dpb_size = dec->dpb.res->buf->size;
521 decode->dt_size =
522 ((struct r600_resource *)((struct vl_video_buffer *)target)->resources[0])->buf->size +
523 ((struct r600_resource *)((struct vl_video_buffer *)target)->resources[1])->buf->size;
524
525 decode->sct_size = 0;
526 decode->sc_coeff_size = 0;
527
528 decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
529 decode->db_pitch = align(dec->base.width, 32);
530 decode->db_surf_tile_config = 0;
531
532 decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.bpe;;
533 decode->dt_uv_pitch = decode->dt_pitch / 2;
534
535 decode->dt_tiling_mode = 0;
536 decode->dt_swizzle_mode = RDECODE_SW_MODE_LINEAR;
537 decode->dt_array_mode = RDECODE_ARRAY_MODE_LINEAR;
538 decode->dt_field_mode = ((struct vl_video_buffer *)target)->base.interlaced;
539 decode->dt_surf_tile_config = 0;
540 decode->dt_uv_surf_tile_config = 0;
541
542 decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
543 decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
544 if (decode->dt_field_mode) {
545 decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset +
546 luma->surface.u.gfx9.surf_slice_size;
547 decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset +
548 chroma->surface.u.gfx9.surf_slice_size;
549 } else {
550 decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
551 decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
552 }
553
554 switch (u_reduce_video_profile(picture->profile)) {
555 case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
556 rvcn_dec_message_avc_t avc =
557 get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
558 memcpy(codec, (void*)&avc, sizeof(rvcn_dec_message_avc_t));
559 index->message_id = RDECODE_MESSAGE_AVC;
560 break;
561 }
562 case PIPE_VIDEO_FORMAT_HEVC: {
563 rvcn_dec_message_hevc_t hevc =
564 get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
565
566 memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t));
567 index->message_id = RDECODE_MESSAGE_HEVC;
568 if (dec->ctx.res == NULL) {
569 unsigned ctx_size;
570 if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
571 ctx_size = calc_ctx_size_h265_main10(dec,
572 (struct pipe_h265_picture_desc*)picture);
573 else
574 ctx_size = calc_ctx_size_h265_main(dec);
575 if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
576 RVID_ERR("Can't allocated context buffer.\n");
577 rvid_clear_buffer(dec->base.context, &dec->ctx);
578 }
579 break;
580 }
581 case PIPE_VIDEO_FORMAT_VC1: {
582 rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
583
584 memcpy(codec, (void*)&vc1, sizeof(rvcn_dec_message_vc1_t));
585 if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
586 (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
587 decode->width_in_samples = align(decode->width_in_samples, 16) / 16;
588 decode->height_in_samples = align(decode->height_in_samples, 16) / 16;
589 }
590 index->message_id = RDECODE_MESSAGE_VC1;
591 break;
592
593 }
594 default:
595 assert(0);
596 return NULL;
597 }
598
599 if (dec->ctx.res)
600 decode->hw_ctxt_size = dec->ctx.res->buf->size;
601
602 return luma->resource.buf;
603 }
604
605 static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
606 {
607 rvcn_dec_message_header_t *header = dec->msg;
608
609 memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t));
610 header->header_size = sizeof(rvcn_dec_message_header_t);
611 header->total_size = sizeof(rvcn_dec_message_header_t) -
612 sizeof(rvcn_dec_message_index_t);
613 header->num_buffers = 0;
614 header->msg_type = RDECODE_MSG_DESTROY;
615 header->stream_handle = dec->stream_handle;
616 header->status_report_feedback_number = 0;
617 }
618
619 static void rvcn_dec_message_feedback(struct radeon_decoder *dec)
620 {
621 rvcn_dec_feedback_header_t *header = (void*)dec->fb;
622
623 header->header_size = sizeof(rvcn_dec_feedback_header_t);
624 header->total_size = sizeof(rvcn_dec_feedback_header_t);
625 header->num_buffers = 0;
626 }
627
628 /* flush IB to the hardware */
629 static int flush(struct radeon_decoder *dec, unsigned flags)
630 {
631 return dec->ws->cs_flush(dec->cs, flags, NULL);
632 }
633
634 /* add a new set register command to the IB */
635 static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val)
636 {
637 radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0));
638 radeon_emit(dec->cs, val);
639 }
640
641 /* send a command to the VCPU through the GPCOM registers */
642 static void send_cmd(struct radeon_decoder *dec, unsigned cmd,
643 struct pb_buffer* buf, uint32_t off,
644 enum radeon_bo_usage usage, enum radeon_bo_domain domain)
645 {
646 uint64_t addr;
647
648 dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
649 domain, RADEON_PRIO_UVD);
650 addr = dec->ws->buffer_get_virtual_address(buf);
651 addr = addr + off;
652
653 set_reg(dec, RDECODE_GPCOM_VCPU_DATA0, addr);
654 set_reg(dec, RDECODE_GPCOM_VCPU_DATA1, addr >> 32);
655 set_reg(dec, RDECODE_GPCOM_VCPU_CMD, cmd << 1);
656 }
657
658 /* do the codec needs an IT buffer ?*/
659 static bool have_it(struct radeon_decoder *dec)
660 {
661 return dec->stream_type == RDECODE_CODEC_H264_PERF ||
662 dec->stream_type == RDECODE_CODEC_H265;
663 }
664
665 /* map the next available message/feedback/itscaling buffer */
666 static void map_msg_fb_it_buf(struct radeon_decoder *dec)
667 {
668 struct rvid_buffer* buf;
669 uint8_t *ptr;
670
671 /* grab the current message/feedback buffer */
672 buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
673
674 /* and map it for CPU access */
675 ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
676
677 /* calc buffer offsets */
678 dec->msg = ptr;
679
680 dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
681 if (have_it(dec))
682 dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
683 }
684
685 /* unmap and send a message command to the VCPU */
686 static void send_msg_buf(struct radeon_decoder *dec)
687 {
688 struct rvid_buffer* buf;
689
690 /* ignore the request if message/feedback buffer isn't mapped */
691 if (!dec->msg || !dec->fb)
692 return;
693
694 /* grab the current message buffer */
695 buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
696
697 /* unmap the buffer */
698 dec->ws->buffer_unmap(buf->res->buf);
699 dec->msg = NULL;
700 dec->fb = NULL;
701 dec->it = NULL;
702
703 if (dec->sessionctx.res)
704 send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER,
705 dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE,
706 RADEON_DOMAIN_VRAM);
707
708 /* and send it to the hardware */
709 send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0,
710 RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
711 }
712
713 /* cycle to the next set of buffers */
714 static void next_buffer(struct radeon_decoder *dec)
715 {
716 ++dec->cur_buffer;
717 dec->cur_buffer %= NUM_BUFFERS;
718 }
719
720 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec)
721 {
722 unsigned width_in_mb, height_in_mb, ctx_size;
723 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
724 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
725
726 unsigned max_references = dec->base.max_references + 1;
727
728 // picture width & height in 16 pixel units
729 width_in_mb = width / VL_MACROBLOCK_WIDTH;
730 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
731
732 unsigned fs_in_mb = width_in_mb * height_in_mb;
733 unsigned num_dpb_buffer;
734 switch(dec->base.level) {
735 case 30:
736 num_dpb_buffer = 8100 / fs_in_mb;
737 break;
738 case 31:
739 num_dpb_buffer = 18000 / fs_in_mb;
740 break;
741 case 32:
742 num_dpb_buffer = 20480 / fs_in_mb;
743 break;
744 case 41:
745 num_dpb_buffer = 32768 / fs_in_mb;
746 break;
747 case 42:
748 num_dpb_buffer = 34816 / fs_in_mb;
749 break;
750 case 50:
751 num_dpb_buffer = 110400 / fs_in_mb;
752 break;
753 case 51:
754 num_dpb_buffer = 184320 / fs_in_mb;
755 break;
756 default:
757 num_dpb_buffer = 184320 / fs_in_mb;
758 break;
759 }
760 num_dpb_buffer++;
761 max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
762 ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
763
764 return ctx_size;
765 }
766
767 /* calculate size of reference picture buffer */
768 static unsigned calc_dpb_size(struct radeon_decoder *dec)
769 {
770 unsigned width_in_mb, height_in_mb, image_size, dpb_size;
771
772 // always align them to MB size for dpb calculation
773 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
774 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
775
776 // always one more for currently decoded picture
777 unsigned max_references = dec->base.max_references + 1;
778
779 // aligned size of a single frame
780 image_size = align(width, 32) * height;
781 image_size += image_size / 2;
782 image_size = align(image_size, 1024);
783
784 // picture width & height in 16 pixel units
785 width_in_mb = width / VL_MACROBLOCK_WIDTH;
786 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
787
788 switch (u_reduce_video_profile(dec->base.profile)) {
789 case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
790 unsigned fs_in_mb = width_in_mb * height_in_mb;
791 unsigned num_dpb_buffer;
792
793 switch(dec->base.level) {
794 case 30:
795 num_dpb_buffer = 8100 / fs_in_mb;
796 break;
797 case 31:
798 num_dpb_buffer = 18000 / fs_in_mb;
799 break;
800 case 32:
801 num_dpb_buffer = 20480 / fs_in_mb;
802 break;
803 case 41:
804 num_dpb_buffer = 32768 / fs_in_mb;
805 break;
806 case 42:
807 num_dpb_buffer = 34816 / fs_in_mb;
808 break;
809 case 50:
810 num_dpb_buffer = 110400 / fs_in_mb;
811 break;
812 case 51:
813 num_dpb_buffer = 184320 / fs_in_mb;
814 break;
815 default:
816 num_dpb_buffer = 184320 / fs_in_mb;
817 break;
818 }
819 num_dpb_buffer++;
820 max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
821 dpb_size = image_size * max_references;
822 break;
823 }
824
825 case PIPE_VIDEO_FORMAT_HEVC:
826 if (dec->base.width * dec->base.height >= 4096*2000)
827 max_references = MAX2(max_references, 8);
828 else
829 max_references = MAX2(max_references, 17);
830
831 width = align (width, 16);
832 height = align (height, 16);
833 if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
834 dpb_size = align((align(width, 32) * height * 9) / 4, 256) * max_references;
835 else
836 dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references;
837 break;
838
839 case PIPE_VIDEO_FORMAT_VC1:
840 // the firmware seems to allways assume a minimum of ref frames
841 max_references = MAX2(NUM_VC1_REFS, max_references);
842
843 // reference picture buffer
844 dpb_size = image_size * max_references;
845
846 // CONTEXT_BUFFER
847 dpb_size += width_in_mb * height_in_mb * 128;
848
849 // IT surface buffer
850 dpb_size += width_in_mb * 64;
851
852 // DB surface buffer
853 dpb_size += width_in_mb * 128;
854
855 // BP
856 dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
857 break;
858
859 case PIPE_VIDEO_FORMAT_MPEG12:
860 // reference picture buffer, must be big enough for all frames
861 dpb_size = image_size * NUM_MPEG2_REFS;
862 break;
863
864 case PIPE_VIDEO_FORMAT_MPEG4:
865 // reference picture buffer
866 dpb_size = image_size * max_references;
867
868 // CM
869 dpb_size += width_in_mb * height_in_mb * 64;
870
871 // IT surface buffer
872 dpb_size += align(width_in_mb * height_in_mb * 32, 64);
873
874 dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
875 break;
876
877 default:
878 // something is missing here
879 assert(0);
880
881 // at least use a sane default value
882 dpb_size = 32 * 1024 * 1024;
883 break;
884 }
885 return dpb_size;
886 }
887
888 /**
889 * destroy this video decoder
890 */
891 static void radeon_dec_destroy(struct pipe_video_codec *decoder)
892 {
893 struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
894 unsigned i;
895
896 assert(decoder);
897
898 map_msg_fb_it_buf(dec);
899 rvcn_dec_message_destroy(dec);
900 send_msg_buf(dec);
901
902 flush(dec, 0);
903
904 dec->ws->cs_destroy(dec->cs);
905
906 for (i = 0; i < NUM_BUFFERS; ++i) {
907 rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
908 rvid_destroy_buffer(&dec->bs_buffers[i]);
909 }
910
911 rvid_destroy_buffer(&dec->dpb);
912 rvid_destroy_buffer(&dec->ctx);
913 rvid_destroy_buffer(&dec->sessionctx);
914
915 FREE(dec);
916 }
917
918 /**
919 * start decoding of a new frame
920 */
921 static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
922 struct pipe_video_buffer *target,
923 struct pipe_picture_desc *picture)
924 {
925 struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
926 uintptr_t frame;
927
928 assert(decoder);
929
930 frame = ++dec->frame_number;
931 vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
932 &radeon_dec_destroy_associated_data);
933
934 dec->bs_size = 0;
935 dec->bs_ptr = dec->ws->buffer_map(
936 dec->bs_buffers[dec->cur_buffer].res->buf,
937 dec->cs, PIPE_TRANSFER_WRITE);
938 }
939
940 /**
941 * decode a macroblock
942 */
943 static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder,
944 struct pipe_video_buffer *target,
945 struct pipe_picture_desc *picture,
946 const struct pipe_macroblock *macroblocks,
947 unsigned num_macroblocks)
948 {
949 /* not supported (yet) */
950 assert(0);
951 }
952
953 /**
954 * decode a bitstream
955 */
956 static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
957 struct pipe_video_buffer *target,
958 struct pipe_picture_desc *picture,
959 unsigned num_buffers,
960 const void * const *buffers,
961 const unsigned *sizes)
962 {
963 struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
964 unsigned i;
965
966 assert(decoder);
967
968 if (!dec->bs_ptr)
969 return;
970
971 for (i = 0; i < num_buffers; ++i) {
972 struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
973 unsigned new_size = dec->bs_size + sizes[i];
974
975 if (new_size > buf->res->buf->size) {
976 dec->ws->buffer_unmap(buf->res->buf);
977 if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
978 RVID_ERR("Can't resize bitstream buffer!");
979 return;
980 }
981
982 dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
983 PIPE_TRANSFER_WRITE);
984 if (!dec->bs_ptr)
985 return;
986
987 dec->bs_ptr += dec->bs_size;
988 }
989
990 memcpy(dec->bs_ptr, buffers[i], sizes[i]);
991 dec->bs_size += sizes[i];
992 dec->bs_ptr += sizes[i];
993 }
994 }
995
996 /**
997 * end decoding of the current frame
998 */
999 static void radeon_dec_end_frame(struct pipe_video_codec *decoder,
1000 struct pipe_video_buffer *target,
1001 struct pipe_picture_desc *picture)
1002 {
1003 struct radeon_decoder *dec = (struct radeon_decoder*)decoder;
1004 struct pb_buffer *dt;
1005 struct rvid_buffer *msg_fb_it_buf, *bs_buf;
1006
1007 assert(decoder);
1008
1009 if (!dec->bs_ptr)
1010 return;
1011
1012 msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
1013 bs_buf = &dec->bs_buffers[dec->cur_buffer];
1014
1015 memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
1016 dec->ws->buffer_unmap(bs_buf->res->buf);
1017
1018 map_msg_fb_it_buf(dec);
1019 dt = rvcn_dec_message_decode(dec, target, picture);
1020 rvcn_dec_message_feedback(dec);
1021 send_msg_buf(dec);
1022
1023 send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
1024 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
1025 if (dec->ctx.res)
1026 send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
1027 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
1028 send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,
1029 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
1030 send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0,
1031 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
1032 send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
1033 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
1034 if (have_it(dec))
1035 send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
1036 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
1037 set_reg(dec, RDECODE_ENGINE_CNTL, 1);
1038
1039 flush(dec, RADEON_FLUSH_ASYNC);
1040 next_buffer(dec);
1041 }
1042
1043 /**
1044 * flush any outstanding command buffers to the hardware
1045 */
1046 static void radeon_dec_flush(struct pipe_video_codec *decoder)
1047 {
1048 }
1049
1050 /**
1051 * create and HW decoder
1052 */
1053 struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
1054 const struct pipe_video_codec *templ)
1055 {
1056 struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
1057 struct r600_common_context *rctx = (struct r600_common_context*)context;
1058 unsigned width = templ->width, height = templ->height;
1059 unsigned dpb_size, bs_buf_size, stream_type = 0;
1060 struct radeon_decoder *dec;
1061 int r, i;
1062
1063 switch(u_reduce_video_profile(templ->profile)) {
1064 case PIPE_VIDEO_FORMAT_MPEG12:
1065 if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
1066 return vl_create_mpeg12_decoder(context, templ);
1067 stream_type = RDECODE_CODEC_MPEG2_VLD;
1068 break;
1069 case PIPE_VIDEO_FORMAT_MPEG4:
1070 width = align(width, VL_MACROBLOCK_WIDTH);
1071 height = align(height, VL_MACROBLOCK_HEIGHT);
1072 stream_type = RDECODE_CODEC_MPEG4;
1073 break;
1074 case PIPE_VIDEO_FORMAT_VC1:
1075 stream_type = RDECODE_CODEC_VC1;
1076 break;
1077 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
1078 width = align(width, VL_MACROBLOCK_WIDTH);
1079 height = align(height, VL_MACROBLOCK_HEIGHT);
1080 stream_type = RDECODE_CODEC_H264_PERF;
1081 break;
1082 case PIPE_VIDEO_FORMAT_HEVC:
1083 stream_type = RDECODE_CODEC_H265;
1084 break;
1085 default:
1086 assert(0);
1087 break;
1088 }
1089
1090 dec = CALLOC_STRUCT(radeon_decoder);
1091
1092 if (!dec)
1093 return NULL;
1094
1095 dec->base = *templ;
1096 dec->base.context = context;
1097 dec->base.width = width;
1098 dec->base.height = height;
1099
1100 dec->base.destroy = radeon_dec_destroy;
1101 dec->base.begin_frame = radeon_dec_begin_frame;
1102 dec->base.decode_macroblock = radeon_dec_decode_macroblock;
1103 dec->base.decode_bitstream = radeon_dec_decode_bitstream;
1104 dec->base.end_frame = radeon_dec_end_frame;
1105 dec->base.flush = radeon_dec_flush;
1106
1107 dec->stream_type = stream_type;
1108 dec->stream_handle = rvid_alloc_stream_handle();
1109 dec->screen = context->screen;
1110 dec->ws = ws;
1111 dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
1112 if (!dec->cs) {
1113 RVID_ERR("Can't get command submission context.\n");
1114 goto error;
1115 }
1116
1117 bs_buf_size = width * height * (512 / (16 * 16));
1118 for (i = 0; i < NUM_BUFFERS; ++i) {
1119 unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
1120 if (have_it(dec))
1121 msg_fb_it_size += IT_SCALING_TABLE_SIZE;
1122 if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
1123 msg_fb_it_size, PIPE_USAGE_STAGING)) {
1124 RVID_ERR("Can't allocated message buffers.\n");
1125 goto error;
1126 }
1127
1128 if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
1129 bs_buf_size, PIPE_USAGE_STAGING)) {
1130 RVID_ERR("Can't allocated bitstream buffers.\n");
1131 goto error;
1132 }
1133
1134 rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
1135 rvid_clear_buffer(context, &dec->bs_buffers[i]);
1136 }
1137
1138 dpb_size = calc_dpb_size(dec);
1139
1140 if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
1141 RVID_ERR("Can't allocated dpb.\n");
1142 goto error;
1143 }
1144
1145 rvid_clear_buffer(context, &dec->dpb);
1146
1147 if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
1148 unsigned ctx_size = calc_ctx_size_h264_perf(dec);
1149 if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
1150 RVID_ERR("Can't allocated context buffer.\n");
1151 goto error;
1152 }
1153 rvid_clear_buffer(context, &dec->ctx);
1154 }
1155
1156 if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
1157 RDECODE_SESSION_CONTEXT_SIZE,
1158 PIPE_USAGE_DEFAULT)) {
1159 RVID_ERR("Can't allocated session ctx.\n");
1160 goto error;
1161 }
1162 rvid_clear_buffer(context, &dec->sessionctx);
1163
1164 map_msg_fb_it_buf(dec);
1165 rvcn_dec_message_create(dec);
1166 send_msg_buf(dec);
1167 r = flush(dec, 0);
1168 if (r)
1169 goto error;
1170
1171 next_buffer(dec);
1172
1173 return &dec->base;
1174
1175 error:
1176 if (dec->cs) dec->ws->cs_destroy(dec->cs);
1177
1178 for (i = 0; i < NUM_BUFFERS; ++i) {
1179 rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
1180 rvid_destroy_buffer(&dec->bs_buffers[i]);
1181 }
1182
1183 rvid_destroy_buffer(&dec->dpb);
1184 rvid_destroy_buffer(&dec->ctx);
1185 rvid_destroy_buffer(&dec->sessionctx);
1186
1187 FREE(dec);
1188
1189 return NULL;
1190 }