1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Christian König <christian.koenig@amd.com>
34 #include <sys/types.h>
40 #include "pipe/p_video_codec.h"
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
48 #include "../../winsys/radeon/drm/radeon_winsys.h"
49 #include "r600_pipe_common.h"
50 #include "radeon_uvd.h"
52 #define RUVD_ERR(fmt, args...) \
53 fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
57 #define NUM_MPEG2_REFS 6
58 #define NUM_H264_REFS 17
59 #define NUM_VC1_REFS 5
61 #define FB_BUFFER_OFFSET 0x1000
62 #define FB_BUFFER_SIZE 2048
64 /* UVD buffer representation */
67 struct pb_buffer
* buf
;
68 struct radeon_winsys_cs_handle
* cs_handle
;
71 /* UVD decoder representation */
73 struct pipe_video_codec base
;
77 unsigned stream_handle
;
78 unsigned frame_number
;
80 struct radeon_winsys
* ws
;
81 struct radeon_winsys_cs
* cs
;
85 struct ruvd_buffer msg_fb_buffers
[NUM_BUFFERS
];
89 struct ruvd_buffer bs_buffers
[NUM_BUFFERS
];
93 struct ruvd_buffer dpb
;
96 /* generate an UVD stream handle */
97 static unsigned alloc_stream_handle()
99 static unsigned counter
= 0;
100 unsigned stream_handle
= 0;
101 unsigned pid
= getpid();
104 for (i
= 0; i
< 32; ++i
)
105 stream_handle
|= ((pid
>> i
) & 1) << (31 - i
);
107 stream_handle
^= ++counter
;
108 return stream_handle
;
111 /* flush IB to the hardware */
112 static void flush(struct ruvd_decoder
*dec
)
114 dec
->ws
->cs_flush(dec
->cs
, RADEON_FLUSH_ASYNC
, 0);
117 /* add a new set register command to the IB */
118 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
120 uint32_t *pm4
= dec
->cs
->buf
;
121 pm4
[dec
->cs
->cdw
++] = RUVD_PKT0(reg
>> 2, 0);
122 pm4
[dec
->cs
->cdw
++] = val
;
125 /* send a command to the VCPU through the GPCOM registers */
126 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
127 struct radeon_winsys_cs_handle
* cs_buf
, uint32_t off
,
128 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
132 reloc_idx
= dec
->ws
->cs_add_reloc(dec
->cs
, cs_buf
, usage
, domain
);
133 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
134 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
135 set_reg(dec
, RUVD_GPCOM_VCPU_CMD
, cmd
<< 1);
138 /* map the next available message/feedback buffer */
139 static void map_msg_fb_buf(struct ruvd_decoder
*dec
)
141 struct ruvd_buffer
* buf
;
144 /* grab the current message/feedback buffer */
145 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
147 /* and map it for CPU access */
148 ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
150 /* calc buffer offsets */
151 dec
->msg
= (struct ruvd_msg
*)ptr
;
152 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
155 /* unmap and send a message command to the VCPU */
156 static void send_msg_buf(struct ruvd_decoder
*dec
)
158 struct ruvd_buffer
* buf
;
160 /* ignore the request if message/feedback buffer isn't mapped */
161 if (!dec
->msg
|| !dec
->fb
)
164 /* grap the current message buffer */
165 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
167 /* unmap the buffer */
168 dec
->ws
->buffer_unmap(buf
->cs_handle
);
172 /* and send it to the hardware */
173 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->cs_handle
, 0,
174 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
177 /* create a buffer in the winsys */
178 static bool create_buffer(struct ruvd_decoder
*dec
,
179 struct ruvd_buffer
*buffer
,
182 buffer
->buf
= dec
->ws
->buffer_create(dec
->ws
, size
, 4096, false,
183 RADEON_DOMAIN_GTT
| RADEON_DOMAIN_VRAM
);
187 buffer
->cs_handle
= dec
->ws
->buffer_get_cs_handle(buffer
->buf
);
188 if (!buffer
->cs_handle
)
194 /* destroy a buffer */
195 static void destroy_buffer(struct ruvd_buffer
*buffer
)
197 pb_reference(&buffer
->buf
, NULL
);
198 buffer
->cs_handle
= NULL
;
201 /* reallocate a buffer, preserving its content */
202 static bool resize_buffer(struct ruvd_decoder
*dec
,
203 struct ruvd_buffer
*new_buf
,
206 unsigned bytes
= MIN2(new_buf
->buf
->size
, new_size
);
207 struct ruvd_buffer old_buf
= *new_buf
;
208 void *src
= NULL
, *dst
= NULL
;
210 if (!create_buffer(dec
, new_buf
, new_size
))
213 src
= dec
->ws
->buffer_map(old_buf
.cs_handle
, dec
->cs
, PIPE_TRANSFER_READ
);
217 dst
= dec
->ws
->buffer_map(new_buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
221 memcpy(dst
, src
, bytes
);
222 if (new_size
> bytes
) {
225 memset(dst
, 0, new_size
);
227 dec
->ws
->buffer_unmap(new_buf
->cs_handle
);
228 dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
229 destroy_buffer(&old_buf
);
233 if (src
) dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
234 destroy_buffer(new_buf
);
239 /* clear the buffer with zeros */
240 static void clear_buffer(struct ruvd_decoder
*dec
,
241 struct ruvd_buffer
* buffer
)
243 //TODO: let the GPU do the job
244 void *ptr
= dec
->ws
->buffer_map(buffer
->cs_handle
, dec
->cs
,
245 PIPE_TRANSFER_WRITE
);
249 memset(ptr
, 0, buffer
->buf
->size
);
250 dec
->ws
->buffer_unmap(buffer
->cs_handle
);
253 /* cycle to the next set of buffers */
254 static void next_buffer(struct ruvd_decoder
*dec
)
257 dec
->cur_buffer
%= NUM_BUFFERS
;
260 /* convert the profile into something UVD understands */
261 static uint32_t profile2stream_type(enum pipe_video_profile profile
)
263 switch (u_reduce_video_profile(profile
)) {
264 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
265 return RUVD_CODEC_H264
;
267 case PIPE_VIDEO_FORMAT_VC1
:
268 return RUVD_CODEC_VC1
;
270 case PIPE_VIDEO_FORMAT_MPEG12
:
271 return RUVD_CODEC_MPEG2
;
273 case PIPE_VIDEO_FORMAT_MPEG4
:
274 return RUVD_CODEC_MPEG4
;
282 /* calculate size of reference picture buffer */
283 static unsigned calc_dpb_size(const struct pipe_video_codec
*templ
)
285 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
287 // always align them to MB size for dpb calculation
288 unsigned width
= align(templ
->width
, VL_MACROBLOCK_WIDTH
);
289 unsigned height
= align(templ
->height
, VL_MACROBLOCK_HEIGHT
);
291 // always one more for currently decoded picture
292 unsigned max_references
= templ
->max_references
+ 1;
294 // aligned size of a single frame
295 image_size
= width
* height
;
296 image_size
+= image_size
/ 2;
297 image_size
= align(image_size
, 1024);
299 // picture width & height in 16 pixel units
300 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
301 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
303 switch (u_reduce_video_profile(templ
->profile
)) {
304 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
305 // the firmware seems to allways assume a minimum of ref frames
306 max_references
= MAX2(NUM_H264_REFS
, max_references
);
308 // reference picture buffer
309 dpb_size
= image_size
* max_references
;
311 // macroblock context buffer
312 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
315 dpb_size
+= width_in_mb
* height_in_mb
* 32;
318 case PIPE_VIDEO_FORMAT_VC1
:
319 // the firmware seems to allways assume a minimum of ref frames
320 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
322 // reference picture buffer
323 dpb_size
= image_size
* max_references
;
326 dpb_size
+= width_in_mb
* height_in_mb
* 128;
329 dpb_size
+= width_in_mb
* 64;
332 dpb_size
+= width_in_mb
* 128;
335 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
338 case PIPE_VIDEO_FORMAT_MPEG12
:
339 // reference picture buffer, must be big enough for all frames
340 dpb_size
= image_size
* NUM_MPEG2_REFS
;
343 case PIPE_VIDEO_FORMAT_MPEG4
:
344 // reference picture buffer
345 dpb_size
= image_size
* max_references
;
348 dpb_size
+= width_in_mb
* height_in_mb
* 64;
351 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
355 // something is missing here
358 // at least use a sane default value
359 dpb_size
= 32 * 1024 * 1024;
365 /* get h264 specific message bits */
366 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
368 struct ruvd_h264 result
;
370 memset(&result
, 0, sizeof(result
));
371 switch (pic
->base
.profile
) {
372 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
373 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
376 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
377 result
.profile
= RUVD_H264_PROFILE_MAIN
;
380 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
381 result
.profile
= RUVD_H264_PROFILE_HIGH
;
388 if (((dec
->base
.width
* dec
->base
.height
) >> 8) <= 1620)
393 result
.sps_info_flags
= 0;
394 result
.sps_info_flags
|= pic
->pps
->sps
->direct_8x8_inference_flag
<< 0;
395 result
.sps_info_flags
|= pic
->pps
->sps
->mb_adaptive_frame_field_flag
<< 1;
396 result
.sps_info_flags
|= pic
->pps
->sps
->frame_mbs_only_flag
<< 2;
397 result
.sps_info_flags
|= pic
->pps
->sps
->delta_pic_order_always_zero_flag
<< 3;
399 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
400 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
401 result
.log2_max_frame_num_minus4
= pic
->pps
->sps
->log2_max_frame_num_minus4
;
402 result
.pic_order_cnt_type
= pic
->pps
->sps
->pic_order_cnt_type
;
403 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
405 switch (dec
->base
.chroma_format
) {
406 case PIPE_VIDEO_CHROMA_FORMAT_400
:
407 result
.chroma_format
= 0;
409 case PIPE_VIDEO_CHROMA_FORMAT_420
:
410 result
.chroma_format
= 1;
412 case PIPE_VIDEO_CHROMA_FORMAT_422
:
413 result
.chroma_format
= 2;
415 case PIPE_VIDEO_CHROMA_FORMAT_444
:
416 result
.chroma_format
= 3;
420 result
.pps_info_flags
= 0;
421 result
.pps_info_flags
|= pic
->pps
->transform_8x8_mode_flag
<< 0;
422 result
.pps_info_flags
|= pic
->pps
->redundant_pic_cnt_present_flag
<< 1;
423 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 2;
424 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_control_present_flag
<< 3;
425 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_idc
<< 4;
426 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 6;
427 result
.pps_info_flags
|= pic
->pps
->bottom_field_pic_order_in_frame_present_flag
<< 7;
428 result
.pps_info_flags
|= pic
->pps
->entropy_coding_mode_flag
<< 8;
430 result
.num_slice_groups_minus1
= pic
->pps
->num_slice_groups_minus1
;
431 result
.slice_group_map_type
= pic
->pps
->slice_group_map_type
;
432 result
.slice_group_change_rate_minus1
= pic
->pps
->slice_group_change_rate_minus1
;
433 result
.pic_init_qp_minus26
= pic
->pps
->pic_init_qp_minus26
;
434 result
.chroma_qp_index_offset
= pic
->pps
->chroma_qp_index_offset
;
435 result
.second_chroma_qp_index_offset
= pic
->pps
->second_chroma_qp_index_offset
;
437 memcpy(result
.scaling_list_4x4
, pic
->pps
->ScalingList4x4
, 6*16);
438 memcpy(result
.scaling_list_8x8
, pic
->pps
->ScalingList8x8
, 2*64);
440 result
.num_ref_frames
= pic
->num_ref_frames
;
442 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
443 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
445 result
.frame_num
= pic
->frame_num
;
446 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
447 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
448 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
449 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
451 result
.decoded_pic_idx
= pic
->frame_num
;
456 /* get vc1 specific message bits */
457 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
459 struct ruvd_vc1 result
;
461 memset(&result
, 0, sizeof(result
));
463 switch(pic
->base
.profile
) {
464 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
465 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
469 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
470 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
474 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
475 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
483 /* fields common for all profiles */
484 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
485 result
.sps_info_flags
|= pic
->pulldown
<< 6;
486 result
.sps_info_flags
|= pic
->interlace
<< 5;
487 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
488 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
489 result
.sps_info_flags
|= pic
->psf
<< 1;
491 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
492 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
493 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
494 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
495 result
.pps_info_flags
|= pic
->multires
<< 21;
496 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
497 result
.pps_info_flags
|= pic
->overlap
<< 11;
498 result
.pps_info_flags
|= pic
->quantizer
<< 9;
499 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
500 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
501 result
.pps_info_flags
|= pic
->vstransform
<< 0;
503 /* some fields only apply to main/advanced profile */
504 if (pic
->base
.profile
!= PIPE_VIDEO_PROFILE_VC1_SIMPLE
) {
505 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
506 result
.pps_info_flags
|= pic
->rangered
<< 19;
507 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
508 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
509 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
510 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
511 result
.pps_info_flags
|= pic
->dquant
<< 1;
514 result
.chroma_format
= 1;
517 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
520 uint8_t frame_coding_mode
521 uint8_t deblockEnable
528 /* extract the frame number from a referenced video buffer */
529 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
531 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
532 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
535 /* seems to be the most sane fallback */
539 /* get the frame number from the associated data */
540 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
542 /* limit the frame number to a valid range */
543 return MAX2(MIN2(frame
, max
), min
);
546 /* get mpeg2 specific msg bits */
547 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
548 struct pipe_mpeg12_picture_desc
*pic
)
550 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
551 struct ruvd_mpeg2 result
;
554 memset(&result
, 0, sizeof(result
));
555 result
.decoded_pic_idx
= dec
->frame_number
;
556 for (i
= 0; i
< 2; ++i
)
557 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
559 result
.load_intra_quantiser_matrix
= 1;
560 result
.load_nonintra_quantiser_matrix
= 1;
562 for (i
= 0; i
< 64; ++i
) {
563 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
564 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
567 result
.profile_and_level_indication
= 0;
568 result
.chroma_format
= 0x1;
570 result
.picture_coding_type
= pic
->picture_coding_type
;
571 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
572 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
573 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
574 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
575 result
.intra_dc_precision
= pic
->intra_dc_precision
;
576 result
.pic_structure
= pic
->picture_structure
;
577 result
.top_field_first
= pic
->top_field_first
;
578 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
579 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
580 result
.q_scale_type
= pic
->q_scale_type
;
581 result
.intra_vlc_format
= pic
->intra_vlc_format
;
582 result
.alternate_scan
= pic
->alternate_scan
;
587 /* get mpeg4 specific msg bits */
588 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
589 struct pipe_mpeg4_picture_desc
*pic
)
591 struct ruvd_mpeg4 result
;
594 memset(&result
, 0, sizeof(result
));
595 result
.decoded_pic_idx
= dec
->frame_number
;
596 for (i
= 0; i
< 2; ++i
)
597 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
599 result
.variant_type
= 0;
600 result
.profile_and_level_indication
= 0xF0; // ASP Level0
602 result
.video_object_layer_verid
= 0x5; // advanced simple
603 result
.video_object_layer_shape
= 0x0; // rectangular
605 result
.video_object_layer_width
= dec
->base
.width
;
606 result
.video_object_layer_height
= dec
->base
.height
;
608 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
610 result
.flags
|= pic
->short_video_header
<< 0;
611 //result.flags |= obmc_disable << 1;
612 result
.flags
|= pic
->interlaced
<< 2;
613 result
.flags
|= 1 << 3; // load_intra_quant_mat
614 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
615 result
.flags
|= pic
->quarter_sample
<< 5;
616 result
.flags
|= 1 << 6; // complexity_estimation_disable
617 result
.flags
|= pic
->resync_marker_disable
<< 7;
618 //result.flags |= data_partitioned << 8;
619 //result.flags |= reversible_vlc << 9;
620 result
.flags
|= 0 << 10; // newpred_enable
621 result
.flags
|= 0 << 11; // reduced_resolution_vop_enable
622 //result.flags |= scalability << 12;
623 //result.flags |= is_object_layer_identifier << 13;
624 //result.flags |= fixed_vop_rate << 14;
625 //result.flags |= newpred_segment_type << 15;
627 result
.quant_type
= pic
->quant_type
;
629 for (i
= 0; i
< 64; ++i
) {
630 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
631 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
637 uint8_t vop_coding_type
638 uint8_t vop_fcode_forward
639 uint8_t vop_fcode_backward
640 uint8_t rounding_control
641 uint8_t alternate_vertical_scan_flag
642 uint8_t top_field_first
649 * destroy this video decoder
651 static void ruvd_destroy(struct pipe_video_codec
*decoder
)
653 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
659 memset(dec
->msg
, 0, sizeof(*dec
->msg
));
660 dec
->msg
->size
= sizeof(*dec
->msg
);
661 dec
->msg
->msg_type
= RUVD_MSG_DESTROY
;
662 dec
->msg
->stream_handle
= dec
->stream_handle
;
667 dec
->ws
->cs_destroy(dec
->cs
);
669 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
670 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
671 destroy_buffer(&dec
->bs_buffers
[i
]);
674 destroy_buffer(&dec
->dpb
);
679 /* free associated data in the video buffer callback */
680 static void ruvd_destroy_associated_data(void *data
)
682 /* NOOP, since we only use an intptr */
686 * start decoding of a new frame
688 static void ruvd_begin_frame(struct pipe_video_codec
*decoder
,
689 struct pipe_video_buffer
*target
,
690 struct pipe_picture_desc
*picture
)
692 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
697 frame
= ++dec
->frame_number
;
698 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
699 &ruvd_destroy_associated_data
);
702 dec
->bs_ptr
= dec
->ws
->buffer_map(
703 dec
->bs_buffers
[dec
->cur_buffer
].cs_handle
,
704 dec
->cs
, PIPE_TRANSFER_WRITE
);
708 * decode a macroblock
710 static void ruvd_decode_macroblock(struct pipe_video_codec
*decoder
,
711 struct pipe_video_buffer
*target
,
712 struct pipe_picture_desc
*picture
,
713 const struct pipe_macroblock
*macroblocks
,
714 unsigned num_macroblocks
)
716 /* not supported (yet) */
723 static void ruvd_decode_bitstream(struct pipe_video_codec
*decoder
,
724 struct pipe_video_buffer
*target
,
725 struct pipe_picture_desc
*picture
,
726 unsigned num_buffers
,
727 const void * const *buffers
,
728 const unsigned *sizes
)
730 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
738 for (i
= 0; i
< num_buffers
; ++i
) {
739 struct ruvd_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
740 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
742 if (new_size
> buf
->buf
->size
) {
743 dec
->ws
->buffer_unmap(buf
->cs_handle
);
744 if (!resize_buffer(dec
, buf
, new_size
)) {
745 RUVD_ERR("Can't resize bitstream buffer!");
749 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
,
750 PIPE_TRANSFER_WRITE
);
754 dec
->bs_ptr
+= dec
->bs_size
;
757 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
758 dec
->bs_size
+= sizes
[i
];
759 dec
->bs_ptr
+= sizes
[i
];
764 * end decoding of the current frame
766 static void ruvd_end_frame(struct pipe_video_codec
*decoder
,
767 struct pipe_video_buffer
*target
,
768 struct pipe_picture_desc
*picture
)
770 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
771 struct radeon_winsys_cs_handle
*dt
;
772 struct ruvd_buffer
*msg_fb_buf
, *bs_buf
;
780 msg_fb_buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
781 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
783 bs_size
= align(dec
->bs_size
, 128);
784 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
785 dec
->ws
->buffer_unmap(bs_buf
->cs_handle
);
788 dec
->msg
->size
= sizeof(*dec
->msg
);
789 dec
->msg
->msg_type
= RUVD_MSG_DECODE
;
790 dec
->msg
->stream_handle
= dec
->stream_handle
;
791 dec
->msg
->status_report_feedback_number
= dec
->frame_number
;
793 dec
->msg
->body
.decode
.stream_type
= profile2stream_type(dec
->base
.profile
);
794 dec
->msg
->body
.decode
.decode_flags
= 0x1;
795 dec
->msg
->body
.decode
.width_in_samples
= dec
->base
.width
;
796 dec
->msg
->body
.decode
.height_in_samples
= dec
->base
.height
;
798 dec
->msg
->body
.decode
.dpb_size
= dec
->dpb
.buf
->size
;
799 dec
->msg
->body
.decode
.bsd_size
= bs_size
;
801 dt
= dec
->set_dtb(dec
->msg
, (struct vl_video_buffer
*)target
);
803 switch (u_reduce_video_profile(picture
->profile
)) {
804 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
805 dec
->msg
->body
.decode
.codec
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
808 case PIPE_VIDEO_FORMAT_VC1
:
809 dec
->msg
->body
.decode
.codec
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
812 case PIPE_VIDEO_FORMAT_MPEG12
:
813 dec
->msg
->body
.decode
.codec
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
816 case PIPE_VIDEO_FORMAT_MPEG4
:
817 dec
->msg
->body
.decode
.codec
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
825 dec
->msg
->body
.decode
.db_surf_tile_config
= dec
->msg
->body
.decode
.dt_surf_tile_config
;
826 dec
->msg
->body
.decode
.extension_support
= 0x1;
828 /* set at least the feedback buffer size */
829 dec
->fb
[0] = FB_BUFFER_SIZE
;
833 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.cs_handle
, 0,
834 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
835 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->cs_handle
,
836 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
837 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
838 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
839 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_buf
->cs_handle
,
840 FB_BUFFER_OFFSET
, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
841 set_reg(dec
, RUVD_ENGINE_CNTL
, 1);
848 * flush any outstanding command buffers to the hardware
850 static void ruvd_flush(struct pipe_video_codec
*decoder
)
855 * create and UVD decoder
857 struct pipe_video_codec
*ruvd_create_decoder(struct pipe_context
*context
,
858 const struct pipe_video_codec
*templ
,
859 ruvd_set_dtb set_dtb
)
861 struct radeon_winsys
* ws
= ((struct r600_common_context
*)context
)->ws
;
862 unsigned dpb_size
= calc_dpb_size(templ
);
863 unsigned width
= templ
->width
, height
= templ
->height
;
864 unsigned bs_buf_size
;
865 struct radeon_info info
;
866 struct ruvd_decoder
*dec
;
869 ws
->query_info(ws
, &info
);
871 switch(u_reduce_video_profile(templ
->profile
)) {
872 case PIPE_VIDEO_FORMAT_MPEG12
:
873 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
|| info
.family
< CHIP_PALM
)
874 return vl_create_mpeg12_decoder(context
, templ
);
877 case PIPE_VIDEO_FORMAT_MPEG4
:
878 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
879 width
= align(width
, VL_MACROBLOCK_WIDTH
);
880 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
888 dec
= CALLOC_STRUCT(ruvd_decoder
);
894 dec
->base
.context
= context
;
895 dec
->base
.width
= width
;
896 dec
->base
.height
= height
;
898 dec
->base
.destroy
= ruvd_destroy
;
899 dec
->base
.begin_frame
= ruvd_begin_frame
;
900 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
901 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
902 dec
->base
.end_frame
= ruvd_end_frame
;
903 dec
->base
.flush
= ruvd_flush
;
905 dec
->set_dtb
= set_dtb
;
906 dec
->stream_handle
= alloc_stream_handle();
908 dec
->cs
= ws
->cs_create(ws
, RING_UVD
, NULL
);
910 RUVD_ERR("Can't get command submission context.\n");
914 bs_buf_size
= width
* height
* 512 / (16 * 16);
915 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
916 unsigned msg_fb_size
= FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
;
917 STATIC_ASSERT(sizeof(struct ruvd_msg
) <= FB_BUFFER_OFFSET
);
918 if (!create_buffer(dec
, &dec
->msg_fb_buffers
[i
], msg_fb_size
)) {
919 RUVD_ERR("Can't allocated message buffers.\n");
923 if (!create_buffer(dec
, &dec
->bs_buffers
[i
], bs_buf_size
)) {
924 RUVD_ERR("Can't allocated bitstream buffers.\n");
928 clear_buffer(dec
, &dec
->msg_fb_buffers
[i
]);
929 clear_buffer(dec
, &dec
->bs_buffers
[i
]);
932 if (!create_buffer(dec
, &dec
->dpb
, dpb_size
)) {
933 RUVD_ERR("Can't allocated dpb.\n");
937 clear_buffer(dec
, &dec
->dpb
);
940 dec
->msg
->size
= sizeof(*dec
->msg
);
941 dec
->msg
->msg_type
= RUVD_MSG_CREATE
;
942 dec
->msg
->stream_handle
= dec
->stream_handle
;
943 dec
->msg
->body
.create
.stream_type
= profile2stream_type(dec
->base
.profile
);
944 dec
->msg
->body
.create
.width_in_samples
= dec
->base
.width
;
945 dec
->msg
->body
.create
.height_in_samples
= dec
->base
.height
;
946 dec
->msg
->body
.create
.dpb_size
= dec
->dpb
.buf
->size
;
954 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
956 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
957 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
958 destroy_buffer(&dec
->bs_buffers
[i
]);
961 destroy_buffer(&dec
->dpb
);
969 * join surfaces into the same buffer with identical tiling params
970 * sumup their sizes and replace the backend buffers with a single bo
972 void ruvd_join_surfaces(struct radeon_winsys
* ws
, unsigned bind
,
973 struct pb_buffer
** buffers
[VL_NUM_COMPONENTS
],
974 struct radeon_surface
*surfaces
[VL_NUM_COMPONENTS
])
976 unsigned best_tiling
, best_wh
, off
;
977 unsigned size
, alignment
;
978 struct pb_buffer
*pb
;
981 for (i
= 0, best_tiling
= 0, best_wh
= ~0; i
< VL_NUM_COMPONENTS
; ++i
) {
987 /* choose the smallest bank w/h for now */
988 wh
= surfaces
[i
]->bankw
* surfaces
[i
]->bankh
;
995 for (i
= 0, off
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
999 /* copy the tiling parameters */
1000 surfaces
[i
]->bankw
= surfaces
[best_tiling
]->bankw
;
1001 surfaces
[i
]->bankh
= surfaces
[best_tiling
]->bankh
;
1002 surfaces
[i
]->mtilea
= surfaces
[best_tiling
]->mtilea
;
1003 surfaces
[i
]->tile_split
= surfaces
[best_tiling
]->tile_split
;
1005 /* adjust the texture layer offsets */
1006 off
= align(off
, surfaces
[i
]->bo_alignment
);
1007 for (j
= 0; j
< Elements(surfaces
[i
]->level
); ++j
)
1008 surfaces
[i
]->level
[j
].offset
+= off
;
1009 off
+= surfaces
[i
]->bo_size
;
1012 for (i
= 0, size
= 0, alignment
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
1013 if (!buffers
[i
] || !*buffers
[i
])
1016 size
= align(size
, (*buffers
[i
])->alignment
);
1017 size
+= (*buffers
[i
])->size
;
1018 alignment
= MAX2(alignment
, (*buffers
[i
])->alignment
* 1);
1024 /* TODO: 2D tiling workaround */
1027 pb
= ws
->buffer_create(ws
, size
, alignment
, bind
, RADEON_DOMAIN_VRAM
);
1031 for (i
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
1032 if (!buffers
[i
] || !*buffers
[i
])
1035 pb_reference(buffers
[i
], pb
);
1038 pb_reference(&pb
, NULL
);
1041 /* calculate top/bottom offset */
1042 static unsigned texture_offset(struct radeon_surface
*surface
, unsigned layer
)
1044 return surface
->level
[0].offset
+
1045 layer
* surface
->level
[0].slice_size
;
1048 /* hw encode the aspect of macro tiles */
1049 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
1051 switch (macro_tile_aspect
) {
1053 case 1: macro_tile_aspect
= 0; break;
1054 case 2: macro_tile_aspect
= 1; break;
1055 case 4: macro_tile_aspect
= 2; break;
1056 case 8: macro_tile_aspect
= 3; break;
1058 return macro_tile_aspect
;
1061 /* hw encode the bank width and height */
1062 static unsigned bank_wh(unsigned bankwh
)
1066 case 1: bankwh
= 0; break;
1067 case 2: bankwh
= 1; break;
1068 case 4: bankwh
= 2; break;
1069 case 8: bankwh
= 3; break;
1075 * fill decoding target field from the luma and chroma surfaces
1077 void ruvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surface
*luma
,
1078 struct radeon_surface
*chroma
)
1080 msg
->body
.decode
.dt_pitch
= luma
->level
[0].pitch_bytes
;
1081 switch (luma
->level
[0].mode
) {
1082 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1083 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1084 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1086 case RADEON_SURF_MODE_1D
:
1087 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1088 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
1090 case RADEON_SURF_MODE_2D
:
1091 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1092 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
1099 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0);
1100 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0);
1101 if (msg
->body
.decode
.dt_field_mode
) {
1102 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1);
1103 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1);
1105 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
1106 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
1109 assert(luma
->bankw
== chroma
->bankw
);
1110 assert(luma
->bankh
== chroma
->bankh
);
1111 assert(luma
->mtilea
== chroma
->mtilea
);
1113 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->bankw
));
1114 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->bankh
));
1115 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->mtilea
));
1118 int ruvd_get_video_param(struct pipe_screen
*screen
,
1119 enum pipe_video_profile profile
,
1120 enum pipe_video_entrypoint entrypoint
,
1121 enum pipe_video_cap param
)
1123 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)screen
;
1125 /* UVD 2.x limits */
1126 if (rscreen
->family
< CHIP_PALM
) {
1127 enum pipe_video_format codec
= u_reduce_video_profile(profile
);
1129 case PIPE_VIDEO_CAP_SUPPORTED
:
1130 /* no support for MPEG4 */
1131 return codec
!= PIPE_VIDEO_FORMAT_MPEG4
;
1132 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
1133 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
1134 /* and MPEG2 only with shaders */
1135 return codec
!= PIPE_VIDEO_FORMAT_MPEG12
;
1142 case PIPE_VIDEO_CAP_SUPPORTED
:
1143 switch (u_reduce_video_profile(profile
)) {
1144 case PIPE_VIDEO_FORMAT_MPEG12
:
1145 case PIPE_VIDEO_FORMAT_MPEG4
:
1146 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1148 case PIPE_VIDEO_FORMAT_VC1
:
1149 /* FIXME: VC-1 simple/main profile is broken */
1150 return profile
== PIPE_VIDEO_PROFILE_VC1_ADVANCED
;
1154 case PIPE_VIDEO_CAP_NPOT_TEXTURES
:
1156 case PIPE_VIDEO_CAP_MAX_WIDTH
:
1158 case PIPE_VIDEO_CAP_MAX_HEIGHT
:
1160 case PIPE_VIDEO_CAP_PREFERED_FORMAT
:
1161 return PIPE_FORMAT_NV12
;
1162 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
1164 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
1166 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE
:
1168 case PIPE_VIDEO_CAP_MAX_LEVEL
:
1170 case PIPE_VIDEO_PROFILE_MPEG1
:
1172 case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE
:
1173 case PIPE_VIDEO_PROFILE_MPEG2_MAIN
:
1175 case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE
:
1177 case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE
:
1179 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
1181 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
1183 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
1185 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
1186 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
1187 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
1197 boolean
ruvd_is_format_supported(struct pipe_screen
*screen
,
1198 enum pipe_format format
,
1199 enum pipe_video_profile profile
,
1200 enum pipe_video_entrypoint entrypoint
)
1202 /* we can only handle this one with UVD */
1203 if (profile
!= PIPE_VIDEO_PROFILE_UNKNOWN
)
1204 return format
== PIPE_FORMAT_NV12
;
1206 return vl_video_buffer_is_format_supported(screen
, format
, profile
, entrypoint
);