1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Christian König <christian.koenig@amd.com>
34 #include <sys/types.h>
40 #include "pipe/p_video_codec.h"
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
48 #include "../../winsys/radeon/drm/radeon_winsys.h"
49 #include "r600_pipe_common.h"
50 #include "radeon_uvd.h"
52 #define RUVD_ERR(fmt, args...) \
53 fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
57 #define NUM_MPEG2_REFS 6
58 #define NUM_H264_REFS 17
60 /* UVD buffer representation */
63 struct pb_buffer
* buf
;
64 struct radeon_winsys_cs_handle
* cs_handle
;
67 /* UVD decoder representation */
69 struct pipe_video_codec base
;
73 unsigned stream_handle
;
74 unsigned frame_number
;
76 struct radeon_winsys
* ws
;
77 struct radeon_winsys_cs
* cs
;
81 struct ruvd_buffer msg_fb_buffers
[NUM_BUFFERS
];
82 struct ruvd_buffer bs_buffers
[NUM_BUFFERS
];
86 struct ruvd_buffer dpb
;
89 /* generate an UVD stream handle */
90 static unsigned alloc_stream_handle()
92 static unsigned counter
= 0;
93 unsigned stream_handle
= 0;
94 unsigned pid
= getpid();
97 for (i
= 0; i
< 32; ++i
)
98 stream_handle
|= ((pid
>> i
) & 1) << (31 - i
);
100 stream_handle
^= ++counter
;
101 return stream_handle
;
104 /* flush IB to the hardware */
105 static void flush(struct ruvd_decoder
*dec
)
107 dec
->ws
->cs_flush(dec
->cs
, RADEON_FLUSH_ASYNC
, 0);
110 /* add a new set register command to the IB */
111 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
113 uint32_t *pm4
= dec
->cs
->buf
;
114 pm4
[dec
->cs
->cdw
++] = RUVD_PKT0(reg
>> 2, 0);
115 pm4
[dec
->cs
->cdw
++] = val
;
118 /* send a command to the VCPU through the GPCOM registers */
119 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
120 struct radeon_winsys_cs_handle
* cs_buf
, uint32_t off
,
121 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
125 reloc_idx
= dec
->ws
->cs_add_reloc(dec
->cs
, cs_buf
, usage
, domain
);
126 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
127 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
128 set_reg(dec
, RUVD_GPCOM_VCPU_CMD
, cmd
<< 1);
131 /* send a message command to the VCPU */
132 static void send_msg(struct ruvd_decoder
*dec
, struct ruvd_msg
*msg
)
134 struct ruvd_buffer
* buf
;
137 /* grap a message buffer */
138 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
140 /* copy the message into it */
141 ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
145 memcpy(ptr
, msg
, sizeof(*msg
));
146 memset(ptr
+ sizeof(*msg
), 0, buf
->buf
->size
- sizeof(*msg
));
147 dec
->ws
->buffer_unmap(buf
->cs_handle
);
149 /* and send it to the hardware */
150 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->cs_handle
, 0,
151 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
154 /* create a buffer in the winsys */
155 static bool create_buffer(struct ruvd_decoder
*dec
,
156 struct ruvd_buffer
*buffer
,
159 buffer
->buf
= dec
->ws
->buffer_create(dec
->ws
, size
, 4096, false,
160 RADEON_DOMAIN_GTT
| RADEON_DOMAIN_VRAM
);
164 buffer
->cs_handle
= dec
->ws
->buffer_get_cs_handle(buffer
->buf
);
165 if (!buffer
->cs_handle
)
171 /* destroy a buffer */
172 static void destroy_buffer(struct ruvd_buffer
*buffer
)
174 pb_reference(&buffer
->buf
, NULL
);
175 buffer
->cs_handle
= NULL
;
178 /* reallocate a buffer, preserving its content */
179 static bool resize_buffer(struct ruvd_decoder
*dec
,
180 struct ruvd_buffer
*new_buf
,
183 unsigned bytes
= MIN2(new_buf
->buf
->size
, new_size
);
184 struct ruvd_buffer old_buf
= *new_buf
;
185 void *src
= NULL
, *dst
= NULL
;
187 if (!create_buffer(dec
, new_buf
, new_size
))
190 src
= dec
->ws
->buffer_map(old_buf
.cs_handle
, dec
->cs
, PIPE_TRANSFER_READ
);
194 dst
= dec
->ws
->buffer_map(new_buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
198 memcpy(dst
, src
, bytes
);
199 if (new_size
> bytes
) {
202 memset(dst
, 0, new_size
);
204 dec
->ws
->buffer_unmap(new_buf
->cs_handle
);
205 dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
206 destroy_buffer(&old_buf
);
210 if (src
) dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
211 destroy_buffer(new_buf
);
216 /* clear the buffer with zeros */
217 static void clear_buffer(struct ruvd_decoder
*dec
,
218 struct ruvd_buffer
* buffer
)
220 //TODO: let the GPU do the job
221 void *ptr
= dec
->ws
->buffer_map(buffer
->cs_handle
, dec
->cs
,
222 PIPE_TRANSFER_WRITE
);
226 memset(ptr
, 0, buffer
->buf
->size
);
227 dec
->ws
->buffer_unmap(buffer
->cs_handle
);
230 /* cycle to the next set of buffers */
231 static void next_buffer(struct ruvd_decoder
*dec
)
234 dec
->cur_buffer
%= NUM_BUFFERS
;
237 /* convert the profile into something UVD understands */
238 static uint32_t profile2stream_type(enum pipe_video_profile profile
)
240 switch (u_reduce_video_profile(profile
)) {
241 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
242 return RUVD_CODEC_H264
;
244 case PIPE_VIDEO_FORMAT_VC1
:
245 return RUVD_CODEC_VC1
;
247 case PIPE_VIDEO_FORMAT_MPEG12
:
248 return RUVD_CODEC_MPEG2
;
250 case PIPE_VIDEO_FORMAT_MPEG4
:
251 return RUVD_CODEC_MPEG4
;
259 /* calculate size of reference picture buffer */
260 static unsigned calc_dpb_size(const struct pipe_video_codec
*templ
)
262 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
264 // always align them to MB size for dpb calculation
265 unsigned width
= align(templ
->width
, VL_MACROBLOCK_WIDTH
);
266 unsigned height
= align(templ
->height
, VL_MACROBLOCK_HEIGHT
);
268 // always one more for currently decoded picture
269 unsigned max_references
= templ
->max_references
+ 1;
271 // aligned size of a single frame
272 image_size
= width
* height
;
273 image_size
+= image_size
/ 2;
274 image_size
= align(image_size
, 1024);
276 // picture width & height in 16 pixel units
277 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
278 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
280 switch (u_reduce_video_profile(templ
->profile
)) {
281 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
282 // the firmware seems to allways assume a minimum of ref frames
283 max_references
= MAX2(NUM_H264_REFS
, max_references
);
285 // reference picture buffer
286 dpb_size
= image_size
* max_references
;
288 // macroblock context buffer
289 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
292 dpb_size
+= width_in_mb
* height_in_mb
* 32;
295 case PIPE_VIDEO_FORMAT_VC1
:
296 // reference picture buffer
297 dpb_size
= image_size
* max_references
;
300 dpb_size
+= width_in_mb
* height_in_mb
* 128;
303 dpb_size
+= width_in_mb
* 64;
306 dpb_size
+= width_in_mb
* 128;
309 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
312 case PIPE_VIDEO_FORMAT_MPEG12
:
313 // reference picture buffer, must be big enough for all frames
314 dpb_size
= image_size
* NUM_MPEG2_REFS
;
317 case PIPE_VIDEO_FORMAT_MPEG4
:
318 // reference picture buffer
319 dpb_size
= image_size
* max_references
;
322 dpb_size
+= width_in_mb
* height_in_mb
* 64;
325 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
329 // something is missing here
332 // at least use a sane default value
333 dpb_size
= 32 * 1024 * 1024;
339 /* get h264 specific message bits */
340 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
342 struct ruvd_h264 result
;
344 memset(&result
, 0, sizeof(result
));
345 switch (pic
->base
.profile
) {
346 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
347 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
350 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
351 result
.profile
= RUVD_H264_PROFILE_MAIN
;
354 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
355 result
.profile
= RUVD_H264_PROFILE_HIGH
;
362 if (((dec
->base
.width
* dec
->base
.height
) >> 8) <= 1620)
367 result
.sps_info_flags
= 0;
368 result
.sps_info_flags
|= pic
->direct_8x8_inference_flag
<< 0;
369 result
.sps_info_flags
|= pic
->mb_adaptive_frame_field_flag
<< 1;
370 result
.sps_info_flags
|= pic
->frame_mbs_only_flag
<< 2;
371 result
.sps_info_flags
|= pic
->delta_pic_order_always_zero_flag
<< 3;
373 result
.pps_info_flags
= 0;
374 result
.pps_info_flags
|= pic
->transform_8x8_mode_flag
<< 0;
375 result
.pps_info_flags
|= pic
->redundant_pic_cnt_present_flag
<< 1;
376 result
.pps_info_flags
|= pic
->constrained_intra_pred_flag
<< 2;
377 result
.pps_info_flags
|= pic
->deblocking_filter_control_present_flag
<< 3;
378 result
.pps_info_flags
|= pic
->weighted_bipred_idc
<< 4;
379 result
.pps_info_flags
|= pic
->weighted_pred_flag
<< 6;
380 result
.pps_info_flags
|= pic
->pic_order_present_flag
<< 7;
381 result
.pps_info_flags
|= pic
->entropy_coding_mode_flag
<< 8;
383 result
.chroma_format
= 0x1;
384 result
.bit_depth_luma_minus8
= 0;
385 result
.bit_depth_chroma_minus8
= 0;
387 result
.log2_max_frame_num_minus4
= pic
->log2_max_frame_num_minus4
;
388 result
.pic_order_cnt_type
= pic
->pic_order_cnt_type
;
389 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->log2_max_pic_order_cnt_lsb_minus4
;
390 result
.num_ref_frames
= pic
->num_ref_frames
;
391 result
.pic_init_qp_minus26
= pic
->pic_init_qp_minus26
;
392 result
.chroma_qp_index_offset
= pic
->chroma_qp_index_offset
;
393 result
.second_chroma_qp_index_offset
= pic
->second_chroma_qp_index_offset
;
395 result
.num_slice_groups_minus1
= 0;
396 result
.slice_group_map_type
= 0;
398 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
399 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
401 result
.slice_group_change_rate_minus1
= 0;
403 memcpy(result
.scaling_list_4x4
, pic
->scaling_lists_4x4
, 6*64);
404 memcpy(result
.scaling_list_8x8
, pic
->scaling_lists_8x8
, 2*64);
406 result
.frame_num
= pic
->frame_num
;
407 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
408 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
409 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
410 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
412 result
.decoded_pic_idx
= pic
->frame_num
;
417 /* get vc1 specific message bits */
418 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
420 struct ruvd_vc1 result
;
422 memset(&result
, 0, sizeof(result
));
423 switch(pic
->base
.profile
) {
424 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
425 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
428 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
429 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
432 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
433 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
439 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_VC1_ADVANCED
) {
442 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
443 result
.sps_info_flags
|= pic
->pulldown
<< 6;
444 result
.sps_info_flags
|= pic
->interlace
<< 5;
445 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
446 result
.sps_info_flags
|= pic
->psf
<< 1;
448 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
449 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
450 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
451 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
452 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
453 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
454 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
458 result
.pps_info_flags
|= pic
->multires
<< 21;
459 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
460 result
.pps_info_flags
|= pic
->rangered
<< 19;
461 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
464 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
465 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
467 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
468 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
469 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
470 result
.pps_info_flags
|= pic
->dquant
<< 1;
471 result
.pps_info_flags
|= pic
->vstransform
<< 0;
472 result
.pps_info_flags
|= pic
->overlap
<< 11;
473 result
.pps_info_flags
|= pic
->quantizer
<< 9;
479 uint8_t frame_coding_mode
480 uint8_t deblockEnable
484 result
.chroma_format
= 1;
488 /* extract the frame number from a referenced video buffer */
489 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
491 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
492 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
495 /* seems to be the most sane fallback */
499 /* get the frame number from the associated data */
500 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
502 /* limit the frame number to a valid range */
503 return MAX2(MIN2(frame
, max
), min
);
506 /* get mpeg2 specific msg bits */
507 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
508 struct pipe_mpeg12_picture_desc
*pic
)
510 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
511 struct ruvd_mpeg2 result
;
514 memset(&result
, 0, sizeof(result
));
515 result
.decoded_pic_idx
= dec
->frame_number
;
516 for (i
= 0; i
< 2; ++i
)
517 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
519 result
.load_intra_quantiser_matrix
= 1;
520 result
.load_nonintra_quantiser_matrix
= 1;
522 for (i
= 0; i
< 64; ++i
) {
523 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
524 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
527 result
.profile_and_level_indication
= 0;
528 result
.chroma_format
= 0x1;
530 result
.picture_coding_type
= pic
->picture_coding_type
;
531 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
532 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
533 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
534 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
535 result
.intra_dc_precision
= pic
->intra_dc_precision
;
536 result
.pic_structure
= pic
->picture_structure
;
537 result
.top_field_first
= pic
->top_field_first
;
538 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
539 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
540 result
.q_scale_type
= pic
->q_scale_type
;
541 result
.intra_vlc_format
= pic
->intra_vlc_format
;
542 result
.alternate_scan
= pic
->alternate_scan
;
547 /* get mpeg4 specific msg bits */
548 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
549 struct pipe_mpeg4_picture_desc
*pic
)
551 struct ruvd_mpeg4 result
;
554 memset(&result
, 0, sizeof(result
));
555 result
.decoded_pic_idx
= dec
->frame_number
;
556 for (i
= 0; i
< 2; ++i
)
557 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
559 result
.variant_type
= 0;
560 result
.profile_and_level_indication
= 0xF0; // ASP Level0
562 result
.video_object_layer_verid
= 0x5; // advanced simple
563 result
.video_object_layer_shape
= 0x0; // rectangular
565 result
.video_object_layer_width
= dec
->base
.width
;
566 result
.video_object_layer_height
= dec
->base
.height
;
568 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
570 result
.flags
|= pic
->short_video_header
<< 0;
571 //result.flags |= obmc_disable << 1;
572 result
.flags
|= pic
->interlaced
<< 2;
573 result
.flags
|= 1 << 3; // load_intra_quant_mat
574 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
575 result
.flags
|= pic
->quarter_sample
<< 5;
576 result
.flags
|= 1 << 6; // complexity_estimation_disable
577 result
.flags
|= pic
->resync_marker_disable
<< 7;
578 //result.flags |= data_partitioned << 8;
579 //result.flags |= reversible_vlc << 9;
580 result
.flags
|= 0 << 10; // newpred_enable
581 result
.flags
|= 0 << 11; // reduced_resolution_vop_enable
582 //result.flags |= scalability << 12;
583 //result.flags |= is_object_layer_identifier << 13;
584 //result.flags |= fixed_vop_rate << 14;
585 //result.flags |= newpred_segment_type << 15;
587 result
.quant_type
= pic
->quant_type
;
589 for (i
= 0; i
< 64; ++i
) {
590 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
591 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
597 uint8_t vop_coding_type
598 uint8_t vop_fcode_forward
599 uint8_t vop_fcode_backward
600 uint8_t rounding_control
601 uint8_t alternate_vertical_scan_flag
602 uint8_t top_field_first
609 * destroy this video decoder
611 static void ruvd_destroy(struct pipe_video_codec
*decoder
)
613 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
619 memset(&msg
, 0, sizeof(msg
));
620 msg
.size
= sizeof(msg
);
621 msg
.msg_type
= RUVD_MSG_DESTROY
;
622 msg
.stream_handle
= dec
->stream_handle
;
627 dec
->ws
->cs_destroy(dec
->cs
);
629 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
630 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
631 destroy_buffer(&dec
->bs_buffers
[i
]);
634 destroy_buffer(&dec
->dpb
);
639 /* free associated data in the video buffer callback */
640 static void ruvd_destroy_associated_data(void *data
)
642 /* NOOP, since we only use an intptr */
646 * start decoding of a new frame
648 static void ruvd_begin_frame(struct pipe_video_codec
*decoder
,
649 struct pipe_video_buffer
*target
,
650 struct pipe_picture_desc
*picture
)
652 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
657 frame
= ++dec
->frame_number
;
658 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
659 &ruvd_destroy_associated_data
);
662 dec
->bs_ptr
= dec
->ws
->buffer_map(
663 dec
->bs_buffers
[dec
->cur_buffer
].cs_handle
,
664 dec
->cs
, PIPE_TRANSFER_WRITE
);
668 * decode a macroblock
670 static void ruvd_decode_macroblock(struct pipe_video_codec
*decoder
,
671 struct pipe_video_buffer
*target
,
672 struct pipe_picture_desc
*picture
,
673 const struct pipe_macroblock
*macroblocks
,
674 unsigned num_macroblocks
)
676 /* not supported (yet) */
683 static void ruvd_decode_bitstream(struct pipe_video_codec
*decoder
,
684 struct pipe_video_buffer
*target
,
685 struct pipe_picture_desc
*picture
,
686 unsigned num_buffers
,
687 const void * const *buffers
,
688 const unsigned *sizes
)
690 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
698 for (i
= 0; i
< num_buffers
; ++i
) {
699 struct ruvd_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
700 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
702 if (new_size
> buf
->buf
->size
) {
703 dec
->ws
->buffer_unmap(buf
->cs_handle
);
704 if (!resize_buffer(dec
, buf
, new_size
)) {
705 RUVD_ERR("Can't resize bitstream buffer!");
709 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
,
710 PIPE_TRANSFER_WRITE
);
714 dec
->bs_ptr
+= dec
->bs_size
;
717 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
718 dec
->bs_size
+= sizes
[i
];
719 dec
->bs_ptr
+= sizes
[i
];
724 * end decoding of the current frame
726 static void ruvd_end_frame(struct pipe_video_codec
*decoder
,
727 struct pipe_video_buffer
*target
,
728 struct pipe_picture_desc
*picture
)
730 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
731 struct radeon_winsys_cs_handle
*dt
;
732 struct ruvd_buffer
*msg_fb_buf
, *bs_buf
;
741 msg_fb_buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
742 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
744 bs_size
= align(dec
->bs_size
, 128);
745 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
746 dec
->ws
->buffer_unmap(bs_buf
->cs_handle
);
748 memset(&msg
, 0, sizeof(msg
));
749 msg
.size
= sizeof(msg
);
750 msg
.msg_type
= RUVD_MSG_DECODE
;
751 msg
.stream_handle
= dec
->stream_handle
;
752 msg
.status_report_feedback_number
= dec
->frame_number
;
754 msg
.body
.decode
.stream_type
= profile2stream_type(dec
->base
.profile
);
755 msg
.body
.decode
.decode_flags
= 0x1;
756 msg
.body
.decode
.width_in_samples
= dec
->base
.width
;
757 msg
.body
.decode
.height_in_samples
= dec
->base
.height
;
759 msg
.body
.decode
.dpb_size
= dec
->dpb
.buf
->size
;
760 msg
.body
.decode
.bsd_size
= bs_size
;
762 dt
= dec
->set_dtb(&msg
, (struct vl_video_buffer
*)target
);
764 switch (u_reduce_video_profile(picture
->profile
)) {
765 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
766 msg
.body
.decode
.codec
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
769 case PIPE_VIDEO_FORMAT_VC1
:
770 msg
.body
.decode
.codec
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
773 case PIPE_VIDEO_FORMAT_MPEG12
:
774 msg
.body
.decode
.codec
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
777 case PIPE_VIDEO_FORMAT_MPEG4
:
778 msg
.body
.decode
.codec
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
786 msg
.body
.decode
.db_surf_tile_config
= msg
.body
.decode
.dt_surf_tile_config
;
787 msg
.body
.decode
.extension_support
= 0x1;
790 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.cs_handle
, 0,
791 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
792 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->cs_handle
,
793 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
794 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
795 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
796 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_buf
->cs_handle
,
797 0x1000, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
798 set_reg(dec
, RUVD_ENGINE_CNTL
, 1);
805 * flush any outstanding command buffers to the hardware
807 static void ruvd_flush(struct pipe_video_codec
*decoder
)
812 * create and UVD decoder
814 struct pipe_video_codec
*ruvd_create_decoder(struct pipe_context
*context
,
815 const struct pipe_video_codec
*templ
,
816 ruvd_set_dtb set_dtb
)
818 struct radeon_winsys
* ws
= ((struct r600_common_context
*)context
)->ws
;
819 unsigned dpb_size
= calc_dpb_size(templ
);
820 unsigned width
= templ
->width
, height
= templ
->height
;
821 unsigned bs_buf_size
;
822 struct radeon_info info
;
823 struct ruvd_decoder
*dec
;
827 ws
->query_info(ws
, &info
);
829 switch(u_reduce_video_profile(templ
->profile
)) {
830 case PIPE_VIDEO_FORMAT_MPEG12
:
831 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
|| info
.family
< CHIP_PALM
)
832 return vl_create_mpeg12_decoder(context
, templ
);
835 case PIPE_VIDEO_FORMAT_MPEG4
:
836 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
837 width
= align(width
, VL_MACROBLOCK_WIDTH
);
838 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
846 dec
= CALLOC_STRUCT(ruvd_decoder
);
852 dec
->base
.context
= context
;
853 dec
->base
.width
= width
;
854 dec
->base
.height
= height
;
856 dec
->base
.destroy
= ruvd_destroy
;
857 dec
->base
.begin_frame
= ruvd_begin_frame
;
858 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
859 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
860 dec
->base
.end_frame
= ruvd_end_frame
;
861 dec
->base
.flush
= ruvd_flush
;
863 dec
->set_dtb
= set_dtb
;
864 dec
->stream_handle
= alloc_stream_handle();
866 dec
->cs
= ws
->cs_create(ws
, RING_UVD
, NULL
);
868 RUVD_ERR("Can't get command submission context.\n");
872 bs_buf_size
= width
* height
* 512 / (16 * 16);
873 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
874 unsigned msg_fb_size
= align(sizeof(struct ruvd_msg
), 0x1000) + 0x1000;
875 if (!create_buffer(dec
, &dec
->msg_fb_buffers
[i
], msg_fb_size
)) {
876 RUVD_ERR("Can't allocated message buffers.\n");
880 if (!create_buffer(dec
, &dec
->bs_buffers
[i
], bs_buf_size
)) {
881 RUVD_ERR("Can't allocated bitstream buffers.\n");
885 clear_buffer(dec
, &dec
->msg_fb_buffers
[i
]);
886 clear_buffer(dec
, &dec
->bs_buffers
[i
]);
889 if (!create_buffer(dec
, &dec
->dpb
, dpb_size
)) {
890 RUVD_ERR("Can't allocated dpb.\n");
894 clear_buffer(dec
, &dec
->dpb
);
896 memset(&msg
, 0, sizeof(msg
));
897 msg
.size
= sizeof(msg
);
898 msg
.msg_type
= RUVD_MSG_CREATE
;
899 msg
.stream_handle
= dec
->stream_handle
;
900 msg
.body
.create
.stream_type
= profile2stream_type(dec
->base
.profile
);
901 msg
.body
.create
.width_in_samples
= dec
->base
.width
;
902 msg
.body
.create
.height_in_samples
= dec
->base
.height
;
903 msg
.body
.create
.dpb_size
= dec
->dpb
.buf
->size
;
911 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
913 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
914 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
915 destroy_buffer(&dec
->bs_buffers
[i
]);
918 destroy_buffer(&dec
->dpb
);
926 * join surfaces into the same buffer with identical tiling params
927 * sumup their sizes and replace the backend buffers with a single bo
929 void ruvd_join_surfaces(struct radeon_winsys
* ws
, unsigned bind
,
930 struct pb_buffer
** buffers
[VL_NUM_COMPONENTS
],
931 struct radeon_surface
*surfaces
[VL_NUM_COMPONENTS
])
933 unsigned best_tiling
, best_wh
, off
;
934 unsigned size
, alignment
;
935 struct pb_buffer
*pb
;
938 for (i
= 0, best_tiling
= 0, best_wh
= ~0; i
< VL_NUM_COMPONENTS
; ++i
) {
944 /* choose the smallest bank w/h for now */
945 wh
= surfaces
[i
]->bankw
* surfaces
[i
]->bankh
;
952 for (i
= 0, off
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
956 /* copy the tiling parameters */
957 surfaces
[i
]->bankw
= surfaces
[best_tiling
]->bankw
;
958 surfaces
[i
]->bankh
= surfaces
[best_tiling
]->bankh
;
959 surfaces
[i
]->mtilea
= surfaces
[best_tiling
]->mtilea
;
960 surfaces
[i
]->tile_split
= surfaces
[best_tiling
]->tile_split
;
962 /* adjust the texture layer offsets */
963 off
= align(off
, surfaces
[i
]->bo_alignment
);
964 for (j
= 0; j
< Elements(surfaces
[i
]->level
); ++j
)
965 surfaces
[i
]->level
[j
].offset
+= off
;
966 off
+= surfaces
[i
]->bo_size
;
969 for (i
= 0, size
= 0, alignment
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
970 if (!buffers
[i
] || !*buffers
[i
])
973 size
= align(size
, (*buffers
[i
])->alignment
);
974 size
+= (*buffers
[i
])->size
;
975 alignment
= MAX2(alignment
, (*buffers
[i
])->alignment
* 1);
981 /* TODO: 2D tiling workaround */
984 pb
= ws
->buffer_create(ws
, size
, alignment
, bind
, RADEON_DOMAIN_VRAM
);
988 for (i
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
989 if (!buffers
[i
] || !*buffers
[i
])
992 pb_reference(buffers
[i
], pb
);
995 pb_reference(&pb
, NULL
);
998 /* calculate top/bottom offset */
999 static unsigned texture_offset(struct radeon_surface
*surface
, unsigned layer
)
1001 return surface
->level
[0].offset
+
1002 layer
* surface
->level
[0].slice_size
;
1005 /* hw encode the aspect of macro tiles */
1006 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
1008 switch (macro_tile_aspect
) {
1010 case 1: macro_tile_aspect
= 0; break;
1011 case 2: macro_tile_aspect
= 1; break;
1012 case 4: macro_tile_aspect
= 2; break;
1013 case 8: macro_tile_aspect
= 3; break;
1015 return macro_tile_aspect
;
1018 /* hw encode the bank width and height */
1019 static unsigned bank_wh(unsigned bankwh
)
1023 case 1: bankwh
= 0; break;
1024 case 2: bankwh
= 1; break;
1025 case 4: bankwh
= 2; break;
1026 case 8: bankwh
= 3; break;
1032 * fill decoding target field from the luma and chroma surfaces
1034 void ruvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surface
*luma
,
1035 struct radeon_surface
*chroma
)
1037 msg
->body
.decode
.dt_pitch
= luma
->level
[0].pitch_bytes
;
1038 switch (luma
->level
[0].mode
) {
1039 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1040 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1041 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1043 case RADEON_SURF_MODE_1D
:
1044 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1045 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
1047 case RADEON_SURF_MODE_2D
:
1048 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1049 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
1056 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0);
1057 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0);
1058 if (msg
->body
.decode
.dt_field_mode
) {
1059 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1);
1060 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1);
1062 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
1063 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
1066 assert(luma
->bankw
== chroma
->bankw
);
1067 assert(luma
->bankh
== chroma
->bankh
);
1068 assert(luma
->mtilea
== chroma
->mtilea
);
1070 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->bankw
));
1071 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->bankh
));
1072 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->mtilea
));
1075 int ruvd_get_video_param(struct pipe_screen
*screen
,
1076 enum pipe_video_profile profile
,
1077 enum pipe_video_entrypoint entrypoint
,
1078 enum pipe_video_cap param
)
1080 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)screen
;
1082 /* UVD 2.x limits */
1083 if (rscreen
->family
< CHIP_PALM
) {
1084 enum pipe_video_format codec
= u_reduce_video_profile(profile
);
1086 case PIPE_VIDEO_CAP_SUPPORTED
:
1087 /* no support for MPEG4 */
1088 return codec
!= PIPE_VIDEO_FORMAT_MPEG4
;
1089 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
1090 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
1091 /* and MPEG2 only with shaders */
1092 return codec
!= PIPE_VIDEO_FORMAT_MPEG12
;
1099 case PIPE_VIDEO_CAP_SUPPORTED
:
1100 switch (u_reduce_video_profile(profile
)) {
1101 case PIPE_VIDEO_FORMAT_MPEG12
:
1102 case PIPE_VIDEO_FORMAT_MPEG4
:
1103 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1104 case PIPE_VIDEO_FORMAT_VC1
:
1109 case PIPE_VIDEO_CAP_NPOT_TEXTURES
:
1111 case PIPE_VIDEO_CAP_MAX_WIDTH
:
1113 case PIPE_VIDEO_CAP_MAX_HEIGHT
:
1115 case PIPE_VIDEO_CAP_PREFERED_FORMAT
:
1116 return PIPE_FORMAT_NV12
;
1117 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
1119 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
1121 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE
:
1123 case PIPE_VIDEO_CAP_MAX_LEVEL
:
1125 case PIPE_VIDEO_PROFILE_MPEG1
:
1127 case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE
:
1128 case PIPE_VIDEO_PROFILE_MPEG2_MAIN
:
1130 case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE
:
1132 case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE
:
1134 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
1136 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
1138 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
1140 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
1141 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
1142 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
1152 boolean
ruvd_is_format_supported(struct pipe_screen
*screen
,
1153 enum pipe_format format
,
1154 enum pipe_video_profile profile
,
1155 enum pipe_video_entrypoint entrypoint
)
1157 /* we can only handle this one anyway */
1158 return format
== PIPE_FORMAT_NV12
;