1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Christian König <christian.koenig@amd.com>
34 #include <sys/types.h>
40 #include "pipe/p_video_codec.h"
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
48 #include "radeon/drm/radeon_winsys.h"
49 #include "r600_pipe_common.h"
50 #include "radeon_video.h"
51 #include "radeon_uvd.h"
55 #define NUM_MPEG2_REFS 6
56 #define NUM_H264_REFS 17
57 #define NUM_VC1_REFS 5
59 #define FB_BUFFER_OFFSET 0x1000
60 #define FB_BUFFER_SIZE 2048
62 /* UVD decoder representation */
64 struct pipe_video_codec base
;
68 unsigned stream_handle
;
69 unsigned frame_number
;
71 struct pipe_screen
*screen
;
72 struct radeon_winsys
* ws
;
73 struct radeon_winsys_cs
* cs
;
77 struct rvid_buffer msg_fb_buffers
[NUM_BUFFERS
];
81 struct rvid_buffer bs_buffers
[NUM_BUFFERS
];
85 struct rvid_buffer dpb
;
88 /* flush IB to the hardware */
89 static void flush(struct ruvd_decoder
*dec
)
91 dec
->ws
->cs_flush(dec
->cs
, RADEON_FLUSH_ASYNC
, NULL
, 0);
94 /* add a new set register command to the IB */
95 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
97 uint32_t *pm4
= dec
->cs
->buf
;
98 pm4
[dec
->cs
->cdw
++] = RUVD_PKT0(reg
>> 2, 0);
99 pm4
[dec
->cs
->cdw
++] = val
;
102 /* send a command to the VCPU through the GPCOM registers */
103 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
104 struct radeon_winsys_cs_handle
* cs_buf
, uint32_t off
,
105 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
109 reloc_idx
= dec
->ws
->cs_add_reloc(dec
->cs
, cs_buf
, usage
, domain
,
111 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
112 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
113 set_reg(dec
, RUVD_GPCOM_VCPU_CMD
, cmd
<< 1);
116 /* map the next available message/feedback buffer */
117 static void map_msg_fb_buf(struct ruvd_decoder
*dec
)
119 struct rvid_buffer
* buf
;
122 /* grab the current message/feedback buffer */
123 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
125 /* and map it for CPU access */
126 ptr
= dec
->ws
->buffer_map(buf
->res
->cs_buf
, dec
->cs
, PIPE_TRANSFER_WRITE
);
128 /* calc buffer offsets */
129 dec
->msg
= (struct ruvd_msg
*)ptr
;
130 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
133 /* unmap and send a message command to the VCPU */
134 static void send_msg_buf(struct ruvd_decoder
*dec
)
136 struct rvid_buffer
* buf
;
138 /* ignore the request if message/feedback buffer isn't mapped */
139 if (!dec
->msg
|| !dec
->fb
)
142 /* grab the current message buffer */
143 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
145 /* unmap the buffer */
146 dec
->ws
->buffer_unmap(buf
->res
->cs_buf
);
150 /* and send it to the hardware */
151 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->res
->cs_buf
, 0,
152 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
155 /* cycle to the next set of buffers */
156 static void next_buffer(struct ruvd_decoder
*dec
)
159 dec
->cur_buffer
%= NUM_BUFFERS
;
162 /* convert the profile into something UVD understands */
163 static uint32_t profile2stream_type(enum pipe_video_profile profile
)
165 switch (u_reduce_video_profile(profile
)) {
166 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
167 return RUVD_CODEC_H264
;
169 case PIPE_VIDEO_FORMAT_VC1
:
170 return RUVD_CODEC_VC1
;
172 case PIPE_VIDEO_FORMAT_MPEG12
:
173 return RUVD_CODEC_MPEG2
;
175 case PIPE_VIDEO_FORMAT_MPEG4
:
176 return RUVD_CODEC_MPEG4
;
184 /* calculate size of reference picture buffer */
185 static unsigned calc_dpb_size(const struct pipe_video_codec
*templ
)
187 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
189 // always align them to MB size for dpb calculation
190 unsigned width
= align(templ
->width
, VL_MACROBLOCK_WIDTH
);
191 unsigned height
= align(templ
->height
, VL_MACROBLOCK_HEIGHT
);
193 // always one more for currently decoded picture
194 unsigned max_references
= templ
->max_references
+ 1;
196 // aligned size of a single frame
197 image_size
= width
* height
;
198 image_size
+= image_size
/ 2;
199 image_size
= align(image_size
, 1024);
201 // picture width & height in 16 pixel units
202 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
203 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
205 switch (u_reduce_video_profile(templ
->profile
)) {
206 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
207 // the firmware seems to allways assume a minimum of ref frames
208 max_references
= MAX2(NUM_H264_REFS
, max_references
);
210 // reference picture buffer
211 dpb_size
= image_size
* max_references
;
213 // macroblock context buffer
214 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
217 dpb_size
+= width_in_mb
* height_in_mb
* 32;
220 case PIPE_VIDEO_FORMAT_VC1
:
221 // the firmware seems to allways assume a minimum of ref frames
222 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
224 // reference picture buffer
225 dpb_size
= image_size
* max_references
;
228 dpb_size
+= width_in_mb
* height_in_mb
* 128;
231 dpb_size
+= width_in_mb
* 64;
234 dpb_size
+= width_in_mb
* 128;
237 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
240 case PIPE_VIDEO_FORMAT_MPEG12
:
241 // reference picture buffer, must be big enough for all frames
242 dpb_size
= image_size
* NUM_MPEG2_REFS
;
245 case PIPE_VIDEO_FORMAT_MPEG4
:
246 // reference picture buffer
247 dpb_size
= image_size
* max_references
;
250 dpb_size
+= width_in_mb
* height_in_mb
* 64;
253 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
257 // something is missing here
260 // at least use a sane default value
261 dpb_size
= 32 * 1024 * 1024;
267 /* get h264 specific message bits */
268 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
270 struct ruvd_h264 result
;
272 memset(&result
, 0, sizeof(result
));
273 switch (pic
->base
.profile
) {
274 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
275 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
278 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
279 result
.profile
= RUVD_H264_PROFILE_MAIN
;
282 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
283 result
.profile
= RUVD_H264_PROFILE_HIGH
;
290 if (((dec
->base
.width
* dec
->base
.height
) >> 8) <= 1620)
295 result
.sps_info_flags
= 0;
296 result
.sps_info_flags
|= pic
->pps
->sps
->direct_8x8_inference_flag
<< 0;
297 result
.sps_info_flags
|= pic
->pps
->sps
->mb_adaptive_frame_field_flag
<< 1;
298 result
.sps_info_flags
|= pic
->pps
->sps
->frame_mbs_only_flag
<< 2;
299 result
.sps_info_flags
|= pic
->pps
->sps
->delta_pic_order_always_zero_flag
<< 3;
301 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
302 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
303 result
.log2_max_frame_num_minus4
= pic
->pps
->sps
->log2_max_frame_num_minus4
;
304 result
.pic_order_cnt_type
= pic
->pps
->sps
->pic_order_cnt_type
;
305 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
307 switch (dec
->base
.chroma_format
) {
308 case PIPE_VIDEO_CHROMA_FORMAT_400
:
309 result
.chroma_format
= 0;
311 case PIPE_VIDEO_CHROMA_FORMAT_420
:
312 result
.chroma_format
= 1;
314 case PIPE_VIDEO_CHROMA_FORMAT_422
:
315 result
.chroma_format
= 2;
317 case PIPE_VIDEO_CHROMA_FORMAT_444
:
318 result
.chroma_format
= 3;
322 result
.pps_info_flags
= 0;
323 result
.pps_info_flags
|= pic
->pps
->transform_8x8_mode_flag
<< 0;
324 result
.pps_info_flags
|= pic
->pps
->redundant_pic_cnt_present_flag
<< 1;
325 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 2;
326 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_control_present_flag
<< 3;
327 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_idc
<< 4;
328 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 6;
329 result
.pps_info_flags
|= pic
->pps
->bottom_field_pic_order_in_frame_present_flag
<< 7;
330 result
.pps_info_flags
|= pic
->pps
->entropy_coding_mode_flag
<< 8;
332 result
.num_slice_groups_minus1
= pic
->pps
->num_slice_groups_minus1
;
333 result
.slice_group_map_type
= pic
->pps
->slice_group_map_type
;
334 result
.slice_group_change_rate_minus1
= pic
->pps
->slice_group_change_rate_minus1
;
335 result
.pic_init_qp_minus26
= pic
->pps
->pic_init_qp_minus26
;
336 result
.chroma_qp_index_offset
= pic
->pps
->chroma_qp_index_offset
;
337 result
.second_chroma_qp_index_offset
= pic
->pps
->second_chroma_qp_index_offset
;
339 memcpy(result
.scaling_list_4x4
, pic
->pps
->ScalingList4x4
, 6*16);
340 memcpy(result
.scaling_list_8x8
, pic
->pps
->ScalingList8x8
, 2*64);
342 result
.num_ref_frames
= pic
->num_ref_frames
;
344 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
345 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
347 result
.frame_num
= pic
->frame_num
;
348 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
349 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
350 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
351 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
353 result
.decoded_pic_idx
= pic
->frame_num
;
358 /* get vc1 specific message bits */
359 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
361 struct ruvd_vc1 result
;
363 memset(&result
, 0, sizeof(result
));
365 switch(pic
->base
.profile
) {
366 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
367 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
371 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
372 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
376 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
377 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
385 /* fields common for all profiles */
386 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
387 result
.sps_info_flags
|= pic
->pulldown
<< 6;
388 result
.sps_info_flags
|= pic
->interlace
<< 5;
389 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
390 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
391 result
.sps_info_flags
|= pic
->psf
<< 1;
393 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
394 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
395 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
396 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
397 result
.pps_info_flags
|= pic
->multires
<< 21;
398 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
399 result
.pps_info_flags
|= pic
->overlap
<< 11;
400 result
.pps_info_flags
|= pic
->quantizer
<< 9;
401 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
402 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
403 result
.pps_info_flags
|= pic
->vstransform
<< 0;
405 /* some fields only apply to main/advanced profile */
406 if (pic
->base
.profile
!= PIPE_VIDEO_PROFILE_VC1_SIMPLE
) {
407 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
408 result
.pps_info_flags
|= pic
->rangered
<< 19;
409 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
410 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
411 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
412 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
413 result
.pps_info_flags
|= pic
->dquant
<< 1;
416 result
.chroma_format
= 1;
419 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
422 uint8_t frame_coding_mode
423 uint8_t deblockEnable
430 /* extract the frame number from a referenced video buffer */
431 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
433 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
434 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
437 /* seems to be the most sane fallback */
441 /* get the frame number from the associated data */
442 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
444 /* limit the frame number to a valid range */
445 return MAX2(MIN2(frame
, max
), min
);
448 /* get mpeg2 specific msg bits */
449 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
450 struct pipe_mpeg12_picture_desc
*pic
)
452 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
453 struct ruvd_mpeg2 result
;
456 memset(&result
, 0, sizeof(result
));
457 result
.decoded_pic_idx
= dec
->frame_number
;
458 for (i
= 0; i
< 2; ++i
)
459 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
461 result
.load_intra_quantiser_matrix
= 1;
462 result
.load_nonintra_quantiser_matrix
= 1;
464 for (i
= 0; i
< 64; ++i
) {
465 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
466 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
469 result
.profile_and_level_indication
= 0;
470 result
.chroma_format
= 0x1;
472 result
.picture_coding_type
= pic
->picture_coding_type
;
473 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
474 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
475 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
476 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
477 result
.intra_dc_precision
= pic
->intra_dc_precision
;
478 result
.pic_structure
= pic
->picture_structure
;
479 result
.top_field_first
= pic
->top_field_first
;
480 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
481 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
482 result
.q_scale_type
= pic
->q_scale_type
;
483 result
.intra_vlc_format
= pic
->intra_vlc_format
;
484 result
.alternate_scan
= pic
->alternate_scan
;
489 /* get mpeg4 specific msg bits */
490 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
491 struct pipe_mpeg4_picture_desc
*pic
)
493 struct ruvd_mpeg4 result
;
496 memset(&result
, 0, sizeof(result
));
497 result
.decoded_pic_idx
= dec
->frame_number
;
498 for (i
= 0; i
< 2; ++i
)
499 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
501 result
.variant_type
= 0;
502 result
.profile_and_level_indication
= 0xF0; // ASP Level0
504 result
.video_object_layer_verid
= 0x5; // advanced simple
505 result
.video_object_layer_shape
= 0x0; // rectangular
507 result
.video_object_layer_width
= dec
->base
.width
;
508 result
.video_object_layer_height
= dec
->base
.height
;
510 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
512 result
.flags
|= pic
->short_video_header
<< 0;
513 //result.flags |= obmc_disable << 1;
514 result
.flags
|= pic
->interlaced
<< 2;
515 result
.flags
|= 1 << 3; // load_intra_quant_mat
516 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
517 result
.flags
|= pic
->quarter_sample
<< 5;
518 result
.flags
|= 1 << 6; // complexity_estimation_disable
519 result
.flags
|= pic
->resync_marker_disable
<< 7;
520 //result.flags |= data_partitioned << 8;
521 //result.flags |= reversible_vlc << 9;
522 result
.flags
|= 0 << 10; // newpred_enable
523 result
.flags
|= 0 << 11; // reduced_resolution_vop_enable
524 //result.flags |= scalability << 12;
525 //result.flags |= is_object_layer_identifier << 13;
526 //result.flags |= fixed_vop_rate << 14;
527 //result.flags |= newpred_segment_type << 15;
529 result
.quant_type
= pic
->quant_type
;
531 for (i
= 0; i
< 64; ++i
) {
532 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
533 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
539 uint8_t vop_coding_type
540 uint8_t vop_fcode_forward
541 uint8_t vop_fcode_backward
542 uint8_t rounding_control
543 uint8_t alternate_vertical_scan_flag
544 uint8_t top_field_first
551 * destroy this video decoder
553 static void ruvd_destroy(struct pipe_video_codec
*decoder
)
555 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
561 memset(dec
->msg
, 0, sizeof(*dec
->msg
));
562 dec
->msg
->size
= sizeof(*dec
->msg
);
563 dec
->msg
->msg_type
= RUVD_MSG_DESTROY
;
564 dec
->msg
->stream_handle
= dec
->stream_handle
;
569 dec
->ws
->cs_destroy(dec
->cs
);
571 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
572 rvid_destroy_buffer(&dec
->msg_fb_buffers
[i
]);
573 rvid_destroy_buffer(&dec
->bs_buffers
[i
]);
576 rvid_destroy_buffer(&dec
->dpb
);
581 /* free associated data in the video buffer callback */
582 static void ruvd_destroy_associated_data(void *data
)
584 /* NOOP, since we only use an intptr */
588 * start decoding of a new frame
590 static void ruvd_begin_frame(struct pipe_video_codec
*decoder
,
591 struct pipe_video_buffer
*target
,
592 struct pipe_picture_desc
*picture
)
594 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
599 frame
= ++dec
->frame_number
;
600 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
601 &ruvd_destroy_associated_data
);
604 dec
->bs_ptr
= dec
->ws
->buffer_map(
605 dec
->bs_buffers
[dec
->cur_buffer
].res
->cs_buf
,
606 dec
->cs
, PIPE_TRANSFER_WRITE
);
610 * decode a macroblock
612 static void ruvd_decode_macroblock(struct pipe_video_codec
*decoder
,
613 struct pipe_video_buffer
*target
,
614 struct pipe_picture_desc
*picture
,
615 const struct pipe_macroblock
*macroblocks
,
616 unsigned num_macroblocks
)
618 /* not supported (yet) */
625 static void ruvd_decode_bitstream(struct pipe_video_codec
*decoder
,
626 struct pipe_video_buffer
*target
,
627 struct pipe_picture_desc
*picture
,
628 unsigned num_buffers
,
629 const void * const *buffers
,
630 const unsigned *sizes
)
632 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
640 for (i
= 0; i
< num_buffers
; ++i
) {
641 struct rvid_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
642 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
644 if (new_size
> buf
->res
->buf
->size
) {
645 dec
->ws
->buffer_unmap(buf
->res
->cs_buf
);
646 if (!rvid_resize_buffer(dec
->screen
, dec
->cs
, buf
, new_size
)) {
647 RVID_ERR("Can't resize bitstream buffer!");
651 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->res
->cs_buf
, dec
->cs
,
652 PIPE_TRANSFER_WRITE
);
656 dec
->bs_ptr
+= dec
->bs_size
;
659 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
660 dec
->bs_size
+= sizes
[i
];
661 dec
->bs_ptr
+= sizes
[i
];
666 * end decoding of the current frame
668 static void ruvd_end_frame(struct pipe_video_codec
*decoder
,
669 struct pipe_video_buffer
*target
,
670 struct pipe_picture_desc
*picture
)
672 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
673 struct radeon_winsys_cs_handle
*dt
;
674 struct rvid_buffer
*msg_fb_buf
, *bs_buf
;
682 msg_fb_buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
683 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
685 bs_size
= align(dec
->bs_size
, 128);
686 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
687 dec
->ws
->buffer_unmap(bs_buf
->res
->cs_buf
);
690 dec
->msg
->size
= sizeof(*dec
->msg
);
691 dec
->msg
->msg_type
= RUVD_MSG_DECODE
;
692 dec
->msg
->stream_handle
= dec
->stream_handle
;
693 dec
->msg
->status_report_feedback_number
= dec
->frame_number
;
695 dec
->msg
->body
.decode
.stream_type
= profile2stream_type(dec
->base
.profile
);
696 dec
->msg
->body
.decode
.decode_flags
= 0x1;
697 dec
->msg
->body
.decode
.width_in_samples
= dec
->base
.width
;
698 dec
->msg
->body
.decode
.height_in_samples
= dec
->base
.height
;
700 dec
->msg
->body
.decode
.dpb_size
= dec
->dpb
.res
->buf
->size
;
701 dec
->msg
->body
.decode
.bsd_size
= bs_size
;
703 dt
= dec
->set_dtb(dec
->msg
, (struct vl_video_buffer
*)target
);
705 switch (u_reduce_video_profile(picture
->profile
)) {
706 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
707 dec
->msg
->body
.decode
.codec
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
710 case PIPE_VIDEO_FORMAT_VC1
:
711 dec
->msg
->body
.decode
.codec
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
714 case PIPE_VIDEO_FORMAT_MPEG12
:
715 dec
->msg
->body
.decode
.codec
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
718 case PIPE_VIDEO_FORMAT_MPEG4
:
719 dec
->msg
->body
.decode
.codec
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
727 dec
->msg
->body
.decode
.db_surf_tile_config
= dec
->msg
->body
.decode
.dt_surf_tile_config
;
728 dec
->msg
->body
.decode
.extension_support
= 0x1;
730 /* set at least the feedback buffer size */
731 dec
->fb
[0] = FB_BUFFER_SIZE
;
735 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.res
->cs_buf
, 0,
736 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
737 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->res
->cs_buf
,
738 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
739 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
740 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
741 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_buf
->res
->cs_buf
,
742 FB_BUFFER_OFFSET
, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
743 set_reg(dec
, RUVD_ENGINE_CNTL
, 1);
750 * flush any outstanding command buffers to the hardware
752 static void ruvd_flush(struct pipe_video_codec
*decoder
)
757 * create and UVD decoder
759 struct pipe_video_codec
*ruvd_create_decoder(struct pipe_context
*context
,
760 const struct pipe_video_codec
*templ
,
761 ruvd_set_dtb set_dtb
)
763 struct radeon_winsys
* ws
= ((struct r600_common_context
*)context
)->ws
;
764 unsigned dpb_size
= calc_dpb_size(templ
);
765 unsigned width
= templ
->width
, height
= templ
->height
;
766 unsigned bs_buf_size
;
767 struct radeon_info info
;
768 struct ruvd_decoder
*dec
;
771 ws
->query_info(ws
, &info
);
773 switch(u_reduce_video_profile(templ
->profile
)) {
774 case PIPE_VIDEO_FORMAT_MPEG12
:
775 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
|| info
.family
< CHIP_PALM
)
776 return vl_create_mpeg12_decoder(context
, templ
);
779 case PIPE_VIDEO_FORMAT_MPEG4
:
780 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
781 width
= align(width
, VL_MACROBLOCK_WIDTH
);
782 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
790 dec
= CALLOC_STRUCT(ruvd_decoder
);
796 dec
->base
.context
= context
;
797 dec
->base
.width
= width
;
798 dec
->base
.height
= height
;
800 dec
->base
.destroy
= ruvd_destroy
;
801 dec
->base
.begin_frame
= ruvd_begin_frame
;
802 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
803 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
804 dec
->base
.end_frame
= ruvd_end_frame
;
805 dec
->base
.flush
= ruvd_flush
;
807 dec
->set_dtb
= set_dtb
;
808 dec
->stream_handle
= rvid_alloc_stream_handle();
809 dec
->screen
= context
->screen
;
811 dec
->cs
= ws
->cs_create(ws
, RING_UVD
, NULL
, NULL
, NULL
);
813 RVID_ERR("Can't get command submission context.\n");
817 bs_buf_size
= width
* height
* 512 / (16 * 16);
818 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
819 unsigned msg_fb_size
= FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
;
820 STATIC_ASSERT(sizeof(struct ruvd_msg
) <= FB_BUFFER_OFFSET
);
821 if (!rvid_create_buffer(dec
->screen
, &dec
->msg_fb_buffers
[i
],
822 msg_fb_size
, PIPE_USAGE_DEFAULT
)) {
823 RVID_ERR("Can't allocated message buffers.\n");
827 if (!rvid_create_buffer(dec
->screen
, &dec
->bs_buffers
[i
],
828 bs_buf_size
, PIPE_USAGE_STAGING
)) {
829 RVID_ERR("Can't allocated bitstream buffers.\n");
833 rvid_clear_buffer(dec
->ws
, dec
->cs
, &dec
->msg_fb_buffers
[i
]);
834 rvid_clear_buffer(dec
->ws
, dec
->cs
, &dec
->bs_buffers
[i
]);
837 if (!rvid_create_buffer(dec
->screen
, &dec
->dpb
, dpb_size
, PIPE_USAGE_DEFAULT
)) {
838 RVID_ERR("Can't allocated dpb.\n");
842 rvid_clear_buffer(dec
->ws
, dec
->cs
, &dec
->dpb
);
845 dec
->msg
->size
= sizeof(*dec
->msg
);
846 dec
->msg
->msg_type
= RUVD_MSG_CREATE
;
847 dec
->msg
->stream_handle
= dec
->stream_handle
;
848 dec
->msg
->body
.create
.stream_type
= profile2stream_type(dec
->base
.profile
);
849 dec
->msg
->body
.create
.width_in_samples
= dec
->base
.width
;
850 dec
->msg
->body
.create
.height_in_samples
= dec
->base
.height
;
851 dec
->msg
->body
.create
.dpb_size
= dec
->dpb
.res
->buf
->size
;
859 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
861 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
862 rvid_destroy_buffer(&dec
->msg_fb_buffers
[i
]);
863 rvid_destroy_buffer(&dec
->bs_buffers
[i
]);
866 rvid_destroy_buffer(&dec
->dpb
);
873 /* calculate top/bottom offset */
874 static unsigned texture_offset(struct radeon_surface
*surface
, unsigned layer
)
876 return surface
->level
[0].offset
+
877 layer
* surface
->level
[0].slice_size
;
880 /* hw encode the aspect of macro tiles */
881 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
883 switch (macro_tile_aspect
) {
885 case 1: macro_tile_aspect
= 0; break;
886 case 2: macro_tile_aspect
= 1; break;
887 case 4: macro_tile_aspect
= 2; break;
888 case 8: macro_tile_aspect
= 3; break;
890 return macro_tile_aspect
;
893 /* hw encode the bank width and height */
894 static unsigned bank_wh(unsigned bankwh
)
898 case 1: bankwh
= 0; break;
899 case 2: bankwh
= 1; break;
900 case 4: bankwh
= 2; break;
901 case 8: bankwh
= 3; break;
907 * fill decoding target field from the luma and chroma surfaces
909 void ruvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surface
*luma
,
910 struct radeon_surface
*chroma
)
912 msg
->body
.decode
.dt_pitch
= luma
->level
[0].pitch_bytes
;
913 switch (luma
->level
[0].mode
) {
914 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
915 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
916 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
918 case RADEON_SURF_MODE_1D
:
919 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
920 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
922 case RADEON_SURF_MODE_2D
:
923 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
924 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
931 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0);
932 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0);
933 if (msg
->body
.decode
.dt_field_mode
) {
934 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1);
935 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1);
937 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
938 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
941 assert(luma
->bankw
== chroma
->bankw
);
942 assert(luma
->bankh
== chroma
->bankh
);
943 assert(luma
->mtilea
== chroma
->mtilea
);
945 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->bankw
));
946 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->bankh
));
947 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->mtilea
));