X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeon%2Fradeon_uvd.c;h=93fe147b5ebd7fa6f2223b837ba3cfc586c7808e;hb=e9c76eeeaa673331fec6056a4baa30095de42f5e;hp=154ef8e4038c469fe62a1772ad5ed0dc7e8f057e;hpb=9c353ea29370251f853cd75a0d70975f1d9c7fbc;p=mesa.git diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 154ef8e4038..93fe147b5eb 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -37,7 +37,7 @@ #include #include -#include "pipe/p_video_decoder.h" +#include "pipe/p_video_codec.h" #include "util/u_memory.h" #include "util/u_video.h" @@ -45,191 +45,144 @@ #include "vl/vl_defines.h" #include "vl/vl_mpeg12_decoder.h" -#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "r600_pipe_common.h" +#include "radeon_video.h" #include "radeon_uvd.h" -#define RUVD_ERR(fmt, args...) \ - fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args) - #define NUM_BUFFERS 4 #define NUM_MPEG2_REFS 6 #define NUM_H264_REFS 17 +#define NUM_VC1_REFS 5 -/* UVD buffer representation */ -struct ruvd_buffer -{ - struct pb_buffer* buf; - struct radeon_winsys_cs_handle* cs_handle; -}; +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define FB_BUFFER_SIZE_TONGA (2048 * 64) +#define IT_SCALING_TABLE_SIZE 992 +#define UVD_SESSION_CONTEXT_SIZE (128 * 1024) /* UVD decoder representation */ struct ruvd_decoder { - struct pipe_video_decoder base; + struct pipe_video_codec base; ruvd_set_dtb set_dtb; unsigned stream_handle; + unsigned stream_type; unsigned frame_number; + struct pipe_screen *screen; struct radeon_winsys* ws; struct radeon_winsys_cs* cs; unsigned cur_buffer; - struct ruvd_buffer msg_fb_buffers[NUM_BUFFERS]; - struct ruvd_buffer bs_buffers[NUM_BUFFERS]; + struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; + struct ruvd_msg *msg; + uint32_t *fb; + unsigned fb_size; + uint8_t *it; + + struct rvid_buffer bs_buffers[NUM_BUFFERS]; void* bs_ptr; unsigned bs_size; - struct ruvd_buffer dpb; + struct rvid_buffer dpb; + bool use_legacy; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; }; -/* generate an UVD stream handle */ -static unsigned alloc_stream_handle() -{ - static unsigned counter = 0; - unsigned stream_handle = 0; - unsigned pid = getpid(); - int i; - - for (i = 0; i < 32; ++i) - stream_handle |= ((pid >> i) & 1) << (31 - i); - - stream_handle ^= ++counter; - return stream_handle; -} - /* flush IB to the hardware */ -static void flush(struct ruvd_decoder *dec) +static int flush(struct ruvd_decoder *dec, unsigned flags) { - uint32_t *pm4 = dec->cs->buf; - - // align IB - while(dec->cs->cdw % 16) - pm4[dec->cs->cdw++] = RUVD_PKT2(); - - dec->ws->cs_flush(dec->cs, 0, 0); + return dec->ws->cs_flush(dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) { - uint32_t *pm4 = dec->cs->buf; - pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0); - pm4[dec->cs->cdw++] = val; + radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); + radeon_emit(dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, - struct radeon_winsys_cs_handle* cs_buf, uint32_t off, + struct pb_buffer* buf, uint32_t off, enum radeon_bo_usage usage, enum radeon_bo_domain domain) { int reloc_idx; - reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain); - set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); - set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); + reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, + RADEON_PRIO_UVD); + if (!dec->use_legacy) { + uint64_t addr; + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; + set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr); + set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32); + } else { + off += dec->ws->buffer_get_reloc_offset(buf); + set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); + set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); + } set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1); } -/* send a message command to the VCPU */ -static void send_msg(struct ruvd_decoder *dec, struct ruvd_msg *msg) +/* do the codec needs an IT buffer ?*/ +static bool have_it(struct ruvd_decoder *dec) { - struct ruvd_buffer* buf; - void *ptr; - - /* grap a message buffer */ - buf = &dec->msg_fb_buffers[dec->cur_buffer]; - - /* copy the message into it */ - ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE); - if (!ptr) - return; - - memcpy(ptr, msg, sizeof(*msg)); - memset(ptr + sizeof(*msg), 0, buf->buf->size - sizeof(*msg)); - dec->ws->buffer_unmap(buf->cs_handle); - - /* and send it to the hardware */ - send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->cs_handle, 0, - RADEON_USAGE_READ, RADEON_DOMAIN_VRAM); + return dec->stream_type == RUVD_CODEC_H264_PERF || + dec->stream_type == RUVD_CODEC_H265; } -/* create a buffer in the winsys */ -static bool create_buffer(struct ruvd_decoder *dec, - struct ruvd_buffer *buffer, - unsigned size) +/* map the next available message/feedback/itscaling buffer */ +static void map_msg_fb_it_buf(struct ruvd_decoder *dec) { - buffer->buf = dec->ws->buffer_create(dec->ws, size, 4096, false, - RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM); - if (!buffer->buf) - return false; + struct rvid_buffer* buf; + uint8_t *ptr; - buffer->cs_handle = dec->ws->buffer_get_cs_handle(buffer->buf); - if (!buffer->cs_handle) - return false; + /* grab the current message/feedback buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; - return true; -} + /* and map it for CPU access */ + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); -/* destroy a buffer */ -static void destroy_buffer(struct ruvd_buffer *buffer) -{ - pb_reference(&buffer->buf, NULL); - buffer->cs_handle = NULL; + /* calc buffer offsets */ + dec->msg = (struct ruvd_msg *)ptr; + dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); + if (have_it(dec)) + dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); } -/* reallocate a buffer, preserving its content */ -static bool resize_buffer(struct ruvd_decoder *dec, - struct ruvd_buffer *new_buf, - unsigned new_size) +/* unmap and send a message command to the VCPU */ +static void send_msg_buf(struct ruvd_decoder *dec) { - unsigned bytes = MIN2(new_buf->buf->size, new_size); - struct ruvd_buffer old_buf = *new_buf; - void *src = NULL, *dst = NULL; + struct rvid_buffer* buf; - if (!create_buffer(dec, new_buf, new_size)) - goto error; + /* ignore the request if message/feedback buffer isn't mapped */ + if (!dec->msg || !dec->fb) + return; - src = dec->ws->buffer_map(old_buf.cs_handle, dec->cs, PIPE_TRANSFER_READ); - if (!src) - goto error; + /* grab the current message buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; - dst = dec->ws->buffer_map(new_buf->cs_handle, dec->cs, PIPE_TRANSFER_WRITE); - if (!dst) - goto error; + /* unmap the buffer */ + dec->ws->buffer_unmap(buf->res->buf); + dec->msg = NULL; + dec->fb = NULL; + dec->it = NULL; - memcpy(dst, src, bytes); - if (new_size > bytes) { - new_size -= bytes; - dst += bytes; - memset(dst, 0, new_size); - } - dec->ws->buffer_unmap(new_buf->cs_handle); - dec->ws->buffer_unmap(old_buf.cs_handle); - destroy_buffer(&old_buf); - return true; - -error: - if (src) dec->ws->buffer_unmap(old_buf.cs_handle); - destroy_buffer(new_buf); - *new_buf = old_buf; - return false; -} -/* clear the buffer with zeros */ -static void clear_buffer(struct ruvd_decoder *dec, - struct ruvd_buffer* buffer) -{ - //TODO: let the GPU do the job - void *ptr = dec->ws->buffer_map(buffer->cs_handle, dec->cs, - PIPE_TRANSFER_WRITE); - if (!ptr) - return; + if (dec->sessionctx.res) + send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, + dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); - memset(ptr, 0, buffer->buf->size); - dec->ws->buffer_unmap(buffer->cs_handle); + /* and send it to the hardware */ + send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, + RADEON_USAGE_READ, RADEON_DOMAIN_GTT); } /* cycle to the next set of buffers */ @@ -240,40 +193,146 @@ static void next_buffer(struct ruvd_decoder *dec) } /* convert the profile into something UVD understands */ -static uint32_t profile2stream_type(enum pipe_video_profile profile) +static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) { - switch (u_reduce_video_profile(profile)) { - case PIPE_VIDEO_CODEC_MPEG4_AVC: - return RUVD_CODEC_H264; + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + return (family >= CHIP_TONGA) ? + RUVD_CODEC_H264_PERF : RUVD_CODEC_H264; - case PIPE_VIDEO_CODEC_VC1: + case PIPE_VIDEO_FORMAT_VC1: return RUVD_CODEC_VC1; - case PIPE_VIDEO_CODEC_MPEG12: + case PIPE_VIDEO_FORMAT_MPEG12: return RUVD_CODEC_MPEG2; - case PIPE_VIDEO_CODEC_MPEG4: + case PIPE_VIDEO_FORMAT_MPEG4: return RUVD_CODEC_MPEG4; + case PIPE_VIDEO_FORMAT_HEVC: + return RUVD_CODEC_H265; + default: assert(0); return 0; } } +static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec) +{ + unsigned width_in_mb, height_in_mb, ctx_size; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + switch(dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); + } else { + // the firmware seems to always assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // macroblock context buffer + ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256); + } + + return ctx_size; +} + +static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) +{ + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; +} + +static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) +{ + unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3)); + log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; +} + /* calculate size of reference picture buffer */ -static unsigned calc_dpb_size(enum pipe_video_profile profile, - unsigned width, unsigned height, - unsigned max_references) +static unsigned calc_dpb_size(struct ruvd_decoder *dec) { unsigned width_in_mb, height_in_mb, image_size, dpb_size; // always align them to MB size for dpb calculation - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); // always one more for currently decoded picture - max_references += 1; + unsigned max_references = dec->base.max_references + 1; // aligned size of a single frame image_size = width * height; @@ -284,22 +343,82 @@ static unsigned calc_dpb_size(enum pipe_video_profile profile, width_in_mb = width / VL_MACROBLOCK_WIDTH; height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - switch (u_reduce_video_profile(profile)) { - case PIPE_VIDEO_CODEC_MPEG4_AVC: - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_H264_REFS, max_references); - - // reference picture buffer - dpb_size = image_size * max_references; - - // macroblock context buffer - dpb_size += width_in_mb * height_in_mb * max_references * 192; + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned alignment = 64, num_dpb_buffer; + + if (dec->stream_type == RUVD_CODEC_H264_PERF) + alignment = 256; + switch(dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF) || + (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); + dpb_size += align(width_in_mb * height_in_mb * 32, alignment); + } + } else { + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // reference picture buffer + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF) || + (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + // macroblock context buffer + dpb_size += width_in_mb * height_in_mb * max_references * 192; + // IT surface buffer + dpb_size += width_in_mb * height_in_mb * 32; + } + } + break; + } - // IT surface buffer - dpb_size += width_in_mb * height_in_mb * 32; + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + dpb_size = align((width * height * 9) / 4, 256) * max_references; + else + dpb_size = align((width * height * 3) / 2, 256) * max_references; break; - case PIPE_VIDEO_CODEC_VC1: + case PIPE_VIDEO_FORMAT_VC1: + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_VC1_REFS, max_references); + // reference picture buffer dpb_size = image_size * max_references; @@ -316,12 +435,12 @@ static unsigned calc_dpb_size(enum pipe_video_profile profile, dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); break; - case PIPE_VIDEO_CODEC_MPEG12: + case PIPE_VIDEO_FORMAT_MPEG12: // reference picture buffer, must be big enough for all frames dpb_size = image_size * NUM_MPEG2_REFS; break; - case PIPE_VIDEO_CODEC_MPEG4: + case PIPE_VIDEO_FORMAT_MPEG4: // reference picture buffer dpb_size = image_size * max_references; @@ -330,6 +449,8 @@ static unsigned calc_dpb_size(enum pipe_video_profile profile, // IT surface buffer dpb_size += align(width_in_mb * height_in_mb * 32, 64); + + dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); break; default: @@ -343,6 +464,12 @@ static unsigned calc_dpb_size(enum pipe_video_profile profile, return dpb_size; } +/* free associated data in the video buffer callback */ +static void ruvd_destroy_associated_data(void *data) +{ + /* NOOP, since we only use an intptr */ +} + /* get h264 specific message bits */ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) { @@ -366,50 +493,69 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ assert(0); break; } - if (((dec->base.width * dec->base.height) >> 8) <= 1620) - result.level = 30; - else - result.level = 41; + + result.level = dec->base.level; result.sps_info_flags = 0; - result.sps_info_flags |= pic->direct_8x8_inference_flag << 0; - result.sps_info_flags |= pic->mb_adaptive_frame_field_flag << 1; - result.sps_info_flags |= pic->frame_mbs_only_flag << 2; - result.sps_info_flags |= pic->delta_pic_order_always_zero_flag << 3; + result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; + result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; + result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; + result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; + + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; + result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + + switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + /* TODO: assert? */ + break; + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } result.pps_info_flags = 0; - result.pps_info_flags |= pic->transform_8x8_mode_flag << 0; - result.pps_info_flags |= pic->redundant_pic_cnt_present_flag << 1; - result.pps_info_flags |= pic->constrained_intra_pred_flag << 2; - result.pps_info_flags |= pic->deblocking_filter_control_present_flag << 3; - result.pps_info_flags |= pic->weighted_bipred_idc << 4; - result.pps_info_flags |= pic->weighted_pred_flag << 6; - result.pps_info_flags |= pic->pic_order_present_flag << 7; - result.pps_info_flags |= pic->entropy_coding_mode_flag << 8; - - result.chroma_format = 0x1; - result.bit_depth_luma_minus8 = 0; - result.bit_depth_chroma_minus8 = 0; + result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; + result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; + result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; + result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; + result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; + result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; + + result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; + result.slice_group_map_type = pic->pps->slice_group_map_type; + result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; + result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; + result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; + result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; + + memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); + memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); + + if (dec->stream_type == RUVD_CODEC_H264_PERF) { + memcpy(dec->it, result.scaling_list_4x4, 6*16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); + } - result.log2_max_frame_num_minus4 = pic->log2_max_frame_num_minus4; - result.pic_order_cnt_type = pic->pic_order_cnt_type; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->log2_max_pic_order_cnt_lsb_minus4; result.num_ref_frames = pic->num_ref_frames; - result.pic_init_qp_minus26 = pic->pic_init_qp_minus26; - result.chroma_qp_index_offset = pic->chroma_qp_index_offset; - result.second_chroma_qp_index_offset = pic->second_chroma_qp_index_offset; - - result.num_slice_groups_minus1 = 0; - result.slice_group_map_type = 0; result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; - result.slice_group_change_rate_minus1 = 0; - - memcpy(result.scaling_list_4x4, pic->scaling_lists_4x4, 6*64); - memcpy(result.scaling_list_8x8, pic->scaling_lists_8x8, 2*64); - result.frame_num = pic->frame_num; memcpy(result.frame_num_list, pic->frame_num_list, 4*16); result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; @@ -421,66 +567,229 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ return result; } +/* get h265 specific message bits */ +static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target, + struct pipe_h265_picture_desc *pic) +{ + struct ruvd_h265 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_idx = pic->CurrPicOrderCntVal; + result.curr_poc = pic->CurrPicOrderCntVal; + + vl_video_buffer_set_associated_data(target, &dec->base, + (void *)(uintptr_t)pic->CurrPicOrderCntVal, + &ruvd_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0 ; i < 2 ; i++) { + for (int j = 0 ; j < 15 ; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) && + (target->buffer_format == PIPE_FORMAT_NV12)) { + result.p010_mode = 0; + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.sclr_luma10to8 = 4; + result.sclr_chroma10to8 = 4; + } + + /* TODO + result.highestTid; + result.isNonRef; + + IDRPicFlag; + RAPPicFlag; + NumPocTotalCurr; + NumShortTermPictureSliceHeaderBits; + NumLongTermPictureSliceHeaderBits; + + IsLongTerm[16]; + */ + + return result; +} + /* get vc1 specific message bits */ static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) { struct ruvd_vc1 result; memset(&result, 0, sizeof(result)); + switch(pic->base.profile) { case PIPE_VIDEO_PROFILE_VC1_SIMPLE: result.profile = RUVD_VC1_PROFILE_SIMPLE; + result.level = 1; break; case PIPE_VIDEO_PROFILE_VC1_MAIN: result.profile = RUVD_VC1_PROFILE_MAIN; + result.level = 2; break; - + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: result.profile = RUVD_VC1_PROFILE_ADVANCED; + result.level = 4; break; + default: assert(0); } - if (pic->base.profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) { - result.level = 0; - - result.sps_info_flags |= pic->postprocflag << 7; - result.sps_info_flags |= pic->pulldown << 6; - result.sps_info_flags |= pic->interlace << 5; - result.sps_info_flags |= pic->tfcntrflag << 4; - result.sps_info_flags |= pic->psf << 1; - - result.pps_info_flags |= pic->panscan_flag << 7; - result.pps_info_flags |= pic->refdist_flag << 6; - result.pps_info_flags |= pic->extended_dmv << 8; - result.pps_info_flags |= pic->range_mapy_flag << 31; - result.pps_info_flags |= pic->range_mapy << 28; - result.pps_info_flags |= pic->range_mapuv_flag << 27; - result.pps_info_flags |= pic->range_mapuv << 24; + /* fields common for all profiles */ + result.sps_info_flags |= pic->postprocflag << 7; + result.sps_info_flags |= pic->pulldown << 6; + result.sps_info_flags |= pic->interlace << 5; + result.sps_info_flags |= pic->tfcntrflag << 4; + result.sps_info_flags |= pic->finterpflag << 3; + result.sps_info_flags |= pic->psf << 1; + + result.pps_info_flags |= pic->range_mapy_flag << 31; + result.pps_info_flags |= pic->range_mapy << 28; + result.pps_info_flags |= pic->range_mapuv_flag << 27; + result.pps_info_flags |= pic->range_mapuv << 24; + result.pps_info_flags |= pic->multires << 21; + result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->overlap << 11; + result.pps_info_flags |= pic->quantizer << 9; + result.pps_info_flags |= pic->panscan_flag << 7; + result.pps_info_flags |= pic->refdist_flag << 6; + result.pps_info_flags |= pic->vstransform << 0; - } else { - result.level = 0; - result.pps_info_flags |= pic->multires << 21; + /* some fields only apply to main/advanced profile */ + if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { result.pps_info_flags |= pic->syncmarker << 20; result.pps_info_flags |= pic->rangered << 19; - result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->loopfilter << 5; + result.pps_info_flags |= pic->fastuvmc << 4; + result.pps_info_flags |= pic->extended_mv << 3; + result.pps_info_flags |= pic->extended_dmv << 8; + result.pps_info_flags |= pic->dquant << 1; } - result.sps_info_flags |= pic->finterpflag << 3; - //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) - - result.pps_info_flags |= pic->loopfilter << 5; - result.pps_info_flags |= pic->fastuvmc << 4; - result.pps_info_flags |= pic->extended_mv << 3; - result.pps_info_flags |= pic->dquant << 1; - result.pps_info_flags |= pic->vstransform << 0; - result.pps_info_flags |= pic->overlap << 11; - result.pps_info_flags |= pic->quantizer << 9; - + result.chroma_format = 1; #if 0 +//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) uint32_t slice_count uint8_t picture_type uint8_t frame_coding_mode @@ -488,15 +797,14 @@ uint8_t deblockEnable uint8_t pquant #endif - result.chroma_format = 1; return result; } /* extract the frame number from a referenced video buffer */ static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) { - uint32_t min = dec->frame_number - NUM_MPEG2_REFS; - uint32_t max = dec->frame_number - 1; + uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; + uint32_t max = MAX2(dec->frame_number, 1) - 1; uintptr_t frame; /* seems to be the most sane fallback */ @@ -615,44 +923,40 @@ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, /** * destroy this video decoder */ -static void ruvd_destroy(struct pipe_video_decoder *decoder) +static void ruvd_destroy(struct pipe_video_codec *decoder) { struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - struct ruvd_msg msg; unsigned i; assert(decoder); - memset(&msg, 0, sizeof(msg)); - msg.size = sizeof(msg); - msg.msg_type = RUVD_MSG_DESTROY; - msg.stream_handle = dec->stream_handle; - send_msg(dec, &msg); + map_msg_fb_it_buf(dec); + memset(dec->msg, 0, sizeof(*dec->msg)); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DESTROY; + dec->msg->stream_handle = dec->stream_handle; + send_msg_buf(dec); - flush(dec); + flush(dec, 0); dec->ws->cs_destroy(dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { - destroy_buffer(&dec->msg_fb_buffers[i]); - destroy_buffer(&dec->bs_buffers[i]); + rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + rvid_destroy_buffer(&dec->bs_buffers[i]); } - destroy_buffer(&dec->dpb); + rvid_destroy_buffer(&dec->dpb); + rvid_destroy_buffer(&dec->ctx); + rvid_destroy_buffer(&dec->sessionctx); FREE(dec); } -/* free associated data in the video buffer callback */ -static void ruvd_destroy_associated_data(void *data) -{ - /* NOOP, since we only use an intptr */ -} - /** * start decoding of a new frame */ -static void ruvd_begin_frame(struct pipe_video_decoder *decoder, +static void ruvd_begin_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, struct pipe_picture_desc *picture) { @@ -667,14 +971,14 @@ static void ruvd_begin_frame(struct pipe_video_decoder *decoder, dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( - dec->bs_buffers[dec->cur_buffer].cs_handle, + dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, PIPE_TRANSFER_WRITE); } /** * decode a macroblock */ -static void ruvd_decode_macroblock(struct pipe_video_decoder *decoder, +static void ruvd_decode_macroblock(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, struct pipe_picture_desc *picture, const struct pipe_macroblock *macroblocks, @@ -687,7 +991,7 @@ static void ruvd_decode_macroblock(struct pipe_video_decoder *decoder, /** * decode a bitstream */ -static void ruvd_decode_bitstream(struct pipe_video_decoder *decoder, +static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, struct pipe_picture_desc *picture, unsigned num_buffers, @@ -703,17 +1007,17 @@ static void ruvd_decode_bitstream(struct pipe_video_decoder *decoder, return; for (i = 0; i < num_buffers; ++i) { - struct ruvd_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; unsigned new_size = dec->bs_size + sizes[i]; - if (new_size > buf->buf->size) { - dec->ws->buffer_unmap(buf->cs_handle); - if (!resize_buffer(dec, buf, new_size)) { - RUVD_ERR("Can't resize bitstream buffer!"); + if (new_size > buf->res->buf->size) { + dec->ws->buffer_unmap(buf->res->buf); + if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + RVID_ERR("Can't resize bitstream buffer!"); return; } - dec->bs_ptr = dec->ws->buffer_map(buf->cs_handle, dec->cs, + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); if (!dec->bs_ptr) return; @@ -730,14 +1034,13 @@ static void ruvd_decode_bitstream(struct pipe_video_decoder *decoder, /** * end decoding of the current frame */ -static void ruvd_end_frame(struct pipe_video_decoder *decoder, +static void ruvd_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, struct pipe_picture_desc *picture) { struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - struct radeon_winsys_cs_handle *dt; - struct ruvd_buffer *msg_fb_buf, *bs_buf; - struct ruvd_msg msg; + struct pb_buffer *dt; + struct rvid_buffer *msg_fb_it_buf, *bs_buf; unsigned bs_size; assert(decoder); @@ -745,44 +1048,75 @@ static void ruvd_end_frame(struct pipe_video_decoder *decoder, if (!dec->bs_ptr) return; - msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer]; + msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; bs_buf = &dec->bs_buffers[dec->cur_buffer]; bs_size = align(dec->bs_size, 128); memset(dec->bs_ptr, 0, bs_size - dec->bs_size); - dec->ws->buffer_unmap(bs_buf->cs_handle); - - memset(&msg, 0, sizeof(msg)); - msg.size = sizeof(msg); - msg.msg_type = RUVD_MSG_DECODE; - msg.stream_handle = dec->stream_handle; - msg.status_report_feedback_number = dec->frame_number; + dec->ws->buffer_unmap(bs_buf->res->buf); + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DECODE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->status_report_feedback_number = dec->frame_number; + + dec->msg->body.decode.stream_type = dec->stream_type; + dec->msg->body.decode.decode_flags = 0x1; + dec->msg->body.decode.width_in_samples = dec->base.width; + dec->msg->body.decode.height_in_samples = dec->base.height; + + if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || + (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { + dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16; + dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16; + } - msg.body.decode.stream_type = profile2stream_type(dec->base.profile); - msg.body.decode.decode_flags = 0x1; - msg.body.decode.width_in_samples = dec->base.width; - msg.body.decode.height_in_samples = dec->base.height; + dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; + dec->msg->body.decode.bsd_size = bs_size; + dec->msg->body.decode.db_pitch = align(dec->base.width, 16); - msg.body.decode.dpb_size = dec->dpb.buf->size; - msg.body.decode.bsd_size = bs_size; + if (dec->stream_type == RUVD_CODEC_H264_PERF && + ((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; - dt = dec->set_dtb(&msg, (struct vl_video_buffer *)target); + dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); + if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY) + dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; switch (u_reduce_video_profile(picture->profile)) { - case PIPE_VIDEO_CODEC_MPEG4_AVC: - msg.body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); + break; + + case PIPE_VIDEO_FORMAT_HEVC: + dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + } + rvid_clear_buffer(decoder->context, &dec->ctx); + } + + if (dec->ctx.res) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; break; - case PIPE_VIDEO_CODEC_VC1: - msg.body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); + case PIPE_VIDEO_FORMAT_VC1: + dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); break; - case PIPE_VIDEO_CODEC_MPEG12: - msg.body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); + case PIPE_VIDEO_FORMAT_MPEG12: + dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); break; - case PIPE_VIDEO_CODEC_MPEG4: - msg.body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); + case PIPE_VIDEO_FORMAT_MPEG4: + dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); break; default: @@ -790,58 +1124,70 @@ static void ruvd_end_frame(struct pipe_video_decoder *decoder, return; } - msg.body.decode.db_surf_tile_config = msg.body.decode.dt_surf_tile_config; - msg.body.decode.extension_support = 0x1; + dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; + dec->msg->body.decode.extension_support = 0x1; - send_msg(dec, &msg); - send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.cs_handle, 0, + /* set at least the feedback buffer size */ + dec->fb[0] = dec->fb_size; + + send_msg_buf(dec); + + send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->cs_handle, + if (dec->ctx.res) + send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->cs_handle, - 0x1000, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, + FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); + if (have_it(dec)) + send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, + FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); set_reg(dec, RUVD_ENGINE_CNTL, 1); - flush(dec); + flush(dec, RADEON_FLUSH_ASYNC); next_buffer(dec); } /** * flush any outstanding command buffers to the hardware */ -static void ruvd_flush(struct pipe_video_decoder *decoder) +static void ruvd_flush(struct pipe_video_codec *decoder) { } /** * create and UVD decoder */ -struct pipe_video_decoder *ruvd_create_decoder(struct pipe_context *context, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, - enum pipe_video_chroma_format chroma_format, - unsigned width, unsigned height, - unsigned max_references, bool expect_chunked_decode, - struct radeon_winsys* ws, - ruvd_set_dtb set_dtb) +struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ, + ruvd_set_dtb set_dtb) { - unsigned dpb_size = calc_dpb_size(profile, width, height, max_references); + struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; + struct r600_common_context *rctx = (struct r600_common_context*)context; + unsigned dpb_size; + unsigned width = templ->width, height = templ->height; + unsigned bs_buf_size; + struct radeon_info info; struct ruvd_decoder *dec; - struct ruvd_msg msg; - int i; + int r, i; + + ws->query_info(ws, &info); - switch(u_reduce_video_profile(profile)) { - case PIPE_VIDEO_CODEC_MPEG12: - if (entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - return vl_create_mpeg12_decoder(context, profile, entrypoint, - chroma_format, width, - height, max_references, expect_chunked_decode); + switch(u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: + if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM) + return vl_create_mpeg12_decoder(context, templ); /* fall through */ - case PIPE_VIDEO_CODEC_MPEG4: - case PIPE_VIDEO_CODEC_MPEG4_AVC: + case PIPE_VIDEO_FORMAT_MPEG4: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: width = align(width, VL_MACROBLOCK_WIDTH); height = align(height, VL_MACROBLOCK_HEIGHT); break; @@ -856,10 +1202,11 @@ struct pipe_video_decoder *ruvd_create_decoder(struct pipe_context *context, if (!dec) return NULL; + if (info.drm_major < 3) + dec->use_legacy = true; + + dec->base = *templ; dec->base.context = context; - dec->base.profile = profile; - dec->base.entrypoint = entrypoint; - dec->base.chroma_format = chroma_format; dec->base.width = width; dec->base.height = height; @@ -870,48 +1217,82 @@ struct pipe_video_decoder *ruvd_create_decoder(struct pipe_context *context, dec->base.end_frame = ruvd_end_frame; dec->base.flush = ruvd_flush; + dec->stream_type = profile2stream_type(dec, info.family); dec->set_dtb = set_dtb; - dec->stream_handle = alloc_stream_handle(); + dec->stream_handle = rvid_alloc_stream_handle(); + dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(ws, RING_UVD, NULL); + dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL); if (!dec->cs) { - RUVD_ERR("Can't get command submission context.\n"); + RVID_ERR("Can't get command submission context.\n"); goto error; } + dec->fb_size = (info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : + FB_BUFFER_SIZE; + bs_buf_size = width * height * (512 / (16 * 16)); for (i = 0; i < NUM_BUFFERS; ++i) { - unsigned msg_fb_size = align(sizeof(struct ruvd_msg), 0x1000) + 0x1000; - if (!create_buffer(dec, &dec->msg_fb_buffers[i], msg_fb_size)) { - RUVD_ERR("Can't allocated message buffers.\n"); + unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; + STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); + if (have_it(dec)) + msg_fb_it_size += IT_SCALING_TABLE_SIZE; + if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], + msg_fb_it_size, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated message buffers.\n"); goto error; } - if (!create_buffer(dec, &dec->bs_buffers[i], 4096)) { - RUVD_ERR("Can't allocated bitstream buffers.\n"); + if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i], + bs_buf_size, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated bitstream buffers.\n"); goto error; } - clear_buffer(dec, &dec->msg_fb_buffers[i]); - clear_buffer(dec, &dec->bs_buffers[i]); + rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); + rvid_clear_buffer(context, &dec->bs_buffers[i]); } - if (!create_buffer(dec, &dec->dpb, dpb_size)) { - RUVD_ERR("Can't allocated dpb.\n"); + dpb_size = calc_dpb_size(dec); + + if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); goto error; } - clear_buffer(dec, &dec->dpb); - - memset(&msg, 0, sizeof(msg)); - msg.size = sizeof(msg); - msg.msg_type = RUVD_MSG_CREATE; - msg.stream_handle = dec->stream_handle; - msg.body.create.stream_type = profile2stream_type(dec->base.profile); - msg.body.create.width_in_samples = dec->base.width; - msg.body.create.height_in_samples = dec->base.height; - msg.body.create.dpb_size = dec->dpb.buf->size; - send_msg(dec, &msg); - flush(dec); + rvid_clear_buffer(context, &dec->dpb); + + if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) { + unsigned ctx_size = calc_ctx_size_h264_perf(dec); + if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + goto error; + } + rvid_clear_buffer(context, &dec->ctx); + } + + if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) { + if (!rvid_create_buffer(dec->screen, &dec->sessionctx, + UVD_SESSION_CONTEXT_SIZE, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated session ctx.\n"); + goto error; + } + rvid_clear_buffer(context, &dec->sessionctx); + } + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_CREATE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->body.create.stream_type = dec->stream_type; + dec->msg->body.create.width_in_samples = dec->base.width; + dec->msg->body.create.height_in_samples = dec->base.height; + dec->msg->body.create.dpb_size = dpb_size; + send_msg_buf(dec); + r = flush(dec, 0); + if (r) + goto error; + next_buffer(dec); return &dec->base; @@ -920,92 +1301,21 @@ error: if (dec->cs) dec->ws->cs_destroy(dec->cs); for (i = 0; i < NUM_BUFFERS; ++i) { - destroy_buffer(&dec->msg_fb_buffers[i]); - destroy_buffer(&dec->bs_buffers[i]); + rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + rvid_destroy_buffer(&dec->bs_buffers[i]); } - destroy_buffer(&dec->dpb); + rvid_destroy_buffer(&dec->dpb); + rvid_destroy_buffer(&dec->ctx); + rvid_destroy_buffer(&dec->sessionctx); FREE(dec); return NULL; } -/** - * join surfaces into the same buffer with identical tiling params - * sumup their sizes and replace the backend buffers with a single bo - */ -void ruvd_join_surfaces(struct radeon_winsys* ws, unsigned bind, - struct pb_buffer** buffers[VL_NUM_COMPONENTS], - struct radeon_surface *surfaces[VL_NUM_COMPONENTS]) -{ - unsigned best_tiling, best_wh, off; - unsigned size, alignment; - struct pb_buffer *pb; - unsigned i, j; - - for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) { - unsigned wh; - - if (!surfaces[i]) - continue; - - /* choose the smallest bank w/h for now */ - wh = surfaces[i]->bankw * surfaces[i]->bankh; - if (wh < best_wh) { - best_wh = wh; - best_tiling = i; - } - } - - for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!surfaces[i]) - continue; - - /* copy the tiling parameters */ - surfaces[i]->bankw = surfaces[best_tiling]->bankw; - surfaces[i]->bankh = surfaces[best_tiling]->bankh; - surfaces[i]->mtilea = surfaces[best_tiling]->mtilea; - surfaces[i]->tile_split = surfaces[best_tiling]->tile_split; - - /* adjust the texture layer offsets */ - off = align(off, surfaces[i]->bo_alignment); - for (j = 0; j < Elements(surfaces[i]->level); ++j) - surfaces[i]->level[j].offset += off; - off += surfaces[i]->bo_size; - } - - for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!buffers[i] || !*buffers[i]) - continue; - - size = align(size, (*buffers[i])->alignment); - size += (*buffers[i])->size; - alignment = MAX2(alignment, (*buffers[i])->alignment * 1); - } - - if (!size) - return; - - /* TODO: 2D tiling workaround */ - alignment *= 2; - - pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM); - if (!pb) - return; - - for (i = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!buffers[i] || !*buffers[i]) - continue; - - pb_reference(buffers[i], pb); - } - - pb_reference(&pb, NULL); -} - /* calculate top/bottom offset */ -static unsigned texture_offset(struct radeon_surface *surface, unsigned layer) +static unsigned texture_offset(struct radeon_surf *surface, unsigned layer) { return surface->level[0].offset + layer * surface->level[0].slice_size; @@ -1040,10 +1350,10 @@ static unsigned bank_wh(unsigned bankwh) /** * fill decoding target field from the luma and chroma surfaces */ -void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma, - struct radeon_surface *chroma) +void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, + struct radeon_surf *chroma) { - msg->body.decode.dt_pitch = luma->level[0].pitch_bytes; + msg->body.decode.dt_pitch = luma->level[0].nblk_x * luma->bpe; switch (luma->level[0].mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; @@ -1080,45 +1390,3 @@ void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma, msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh)); msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea)); } - -int ruvd_get_video_param(struct pipe_screen *screen, - enum pipe_video_profile profile, - enum pipe_video_cap param) -{ - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - switch (u_reduce_video_profile(profile)) { - case PIPE_VIDEO_CODEC_MPEG12: - case PIPE_VIDEO_CODEC_MPEG4: - case PIPE_VIDEO_CODEC_MPEG4_AVC: - case PIPE_VIDEO_CODEC_VC1: - return true; - default: - return false; - } - case PIPE_VIDEO_CAP_NPOT_TEXTURES: - return 1; - case PIPE_VIDEO_CAP_MAX_WIDTH: - return 2048; - case PIPE_VIDEO_CAP_MAX_HEIGHT: - return 1152; - case PIPE_VIDEO_CAP_PREFERED_FORMAT: - return PIPE_FORMAT_NV12; - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - return false; - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - return false; /* TODO: enable this */ - case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: - return true; - default: - return 0; - } -} - -boolean ruvd_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_video_profile profile) -{ - /* we can only handle this one anyway */ - return format == PIPE_FORMAT_NV12; -}