radeon/vcn: add decode message for hevc codec
authorLeo Liu <leo.liu@amd.com>
Wed, 3 May 2017 13:40:44 +0000 (09:40 -0400)
committerLeo Liu <leo.liu@amd.com>
Thu, 25 May 2017 15:40:20 +0000 (11:40 -0400)
Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
src/gallium/drivers/radeon/radeon_vcn_dec.c

index c99e77013c0610267d0b3d5d8e454453d3349741..447a2009896adc233c0321a731e20c50a25ba75b 100644 (file)
@@ -180,6 +180,212 @@ static void radeon_dec_destroy_associated_data(void *data)
        /* NOOP, since we only use an intptr */
 }
 
+static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
+                                       struct pipe_video_buffer *target,
+                                       struct pipe_h265_picture_desc *pic)
+{
+       rvcn_dec_message_hevc_t result;
+       unsigned i;
+
+       memset(&result, 0, sizeof(result));
+       result.sps_info_flags = 0;
+       result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+       result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+       result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+       result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+       result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+       result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+       result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+       result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+       result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+       if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+               result.sps_info_flags |= 1 << 9;
+       if (pic->UseRefPicList == true)
+               result.sps_info_flags |= 1 << 10;
+
+       result.chroma_format = pic->pps->sps->chroma_format_idc;
+       result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+       result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+       result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+       result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+       result.log2_min_luma_coding_block_size_minus3 =
+               pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+       result.log2_diff_max_min_luma_coding_block_size =
+               pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+       result.log2_min_transform_block_size_minus2 =
+               pic->pps->sps->log2_min_transform_block_size_minus2;
+       result.log2_diff_max_min_transform_block_size =
+               pic->pps->sps->log2_diff_max_min_transform_block_size;
+       result.max_transform_hierarchy_depth_inter =
+               pic->pps->sps->max_transform_hierarchy_depth_inter;
+       result.max_transform_hierarchy_depth_intra =
+               pic->pps->sps->max_transform_hierarchy_depth_intra;
+       result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+       result.pcm_sample_bit_depth_chroma_minus1 =
+               pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+       result.log2_min_pcm_luma_coding_block_size_minus3 =
+               pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+       result.log2_diff_max_min_pcm_luma_coding_block_size =
+               pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+       result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+       result.pps_info_flags = 0;
+       result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+       result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+       result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+       result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+       result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+       result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+       result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+       result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+       result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+       result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+       result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+       result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+       result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+       result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+       result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+       result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+       result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+       result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+       result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+       result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+
+       result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+       result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+       result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+       result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+       result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+       result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+       result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+       result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+       result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+       result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+       result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+       result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+       result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+       for (i = 0; i < 19; ++i)
+               result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+       for (i = 0; i < 21; ++i)
+               result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+       result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+       result.curr_idx = pic->CurrPicOrderCntVal;
+       result.curr_poc = pic->CurrPicOrderCntVal;
+
+       vl_video_buffer_set_associated_data(target, &dec->base,
+                                           (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+                                           &radeon_dec_destroy_associated_data);
+
+       for (i = 0; i < 16; ++i) {
+               struct pipe_video_buffer *ref = pic->ref[i];
+               uintptr_t ref_pic = 0;
+
+               result.poc_list[i] = pic->PicOrderCntVal[i];
+
+               if (ref)
+                       ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+               else
+                       ref_pic = 0x7F;
+               result.ref_pic_list[i] = ref_pic;
+       }
+
+       for (i = 0; i < 8; ++i) {
+               result.ref_pic_set_st_curr_before[i] = 0xFF;
+               result.ref_pic_set_st_curr_after[i] = 0xFF;
+               result.ref_pic_set_lt_curr[i] = 0xFF;
+       }
+
+       for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+               result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+       for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+               result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+       for (i = 0; i < pic->NumPocLtCurr; ++i)
+               result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+       for (i = 0; i < 6; ++i)
+               result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+       for (i = 0; i < 2; ++i)
+               result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+       memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+       memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+       memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+       memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+       for (i = 0 ; i < 2 ; i++) {
+               for (int j = 0 ; j < 15 ; j++)
+                       result.direct_reflist[i][j] = pic->RefPicList[i][j];
+       }
+
+       if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) &&
+               (target->buffer_format == PIPE_FORMAT_NV12)) {
+               result.p010_mode = 0;
+               result.luma_10to8 = 5;
+               result.chroma_10to8 = 5;
+               result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
+               result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
+       }
+
+       return result;
+}
+
+static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
+{
+       unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+       unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+       unsigned max_references = dec->base.max_references + 1;
+
+       if (dec->base.width * dec->base.height >= 4096*2000)
+               max_references = MAX2(max_references, 8);
+       else
+               max_references = MAX2(max_references, 17);
+
+       width = align (width, 16);
+       height = align (height, 16);
+       return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
+}
+
+static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic)
+{
+       unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
+       unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
+       unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
+
+       unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+       unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+       unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 ||
+                       pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
+
+       unsigned max_references = dec->base.max_references + 1;
+
+       if (dec->base.width * dec->base.height >= 4096*2000)
+               max_references = MAX2(max_references, 8);
+       else
+               max_references = MAX2(max_references, 17);
+
+       block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
+       log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+
+       width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
+       height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
+
+       num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
+       context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
+       max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
+
+       cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
+       db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
+
+       return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
+}
+
 static void rvcn_dec_message_create(struct radeon_decoder *dec)
 {
        rvcn_dec_message_header_t *header = dec->msg;
@@ -294,6 +500,25 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
                index->message_id = RDECODE_MESSAGE_AVC;
                break;
        }
+       case PIPE_VIDEO_FORMAT_HEVC: {
+               rvcn_dec_message_hevc_t hevc =
+                       get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
+
+               memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t));
+               index->message_id = RDECODE_MESSAGE_HEVC;
+               if (dec->ctx.res == NULL) {
+                       unsigned ctx_size;
+                       if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+                               ctx_size = calc_ctx_size_h265_main10(dec,
+                                       (struct pipe_h265_picture_desc*)picture);
+                       else
+                               ctx_size = calc_ctx_size_h265_main(dec);
+                       if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
+                               RVID_ERR("Can't allocated context buffer.\n");
+                       rvid_clear_buffer(dec->base.context, &dec->ctx);
+               }
+               break;
+       }
        default:
                assert(0);
                return NULL;