1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Christian König <christian.koenig@amd.com>
34 #include <sys/types.h>
40 #include "pipe/p_video_decoder.h"
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
48 #include "../../winsys/radeon/drm/radeon_winsys.h"
49 #include "radeon_uvd.h"
51 #define RUVD_ERR(fmt, args...) \
52 fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
56 #define NUM_MPEG2_REFS 6
57 #define NUM_H264_REFS 17
59 /* UVD buffer representation */
62 struct pb_buffer
* buf
;
63 struct radeon_winsys_cs_handle
* cs_handle
;
66 /* UVD decoder representation */
68 struct pipe_video_decoder base
;
72 unsigned stream_handle
;
73 unsigned frame_number
;
75 struct radeon_winsys
* ws
;
76 struct radeon_winsys_cs
* cs
;
80 struct ruvd_buffer msg_fb_buffers
[NUM_BUFFERS
];
81 struct ruvd_buffer bs_buffers
[NUM_BUFFERS
];
85 struct ruvd_buffer dpb
;
88 /* generate an UVD stream handle */
89 static unsigned alloc_stream_handle()
91 static unsigned counter
= 0;
92 unsigned stream_handle
= 0;
93 unsigned pid
= getpid();
96 for (i
= 0; i
< 32; ++i
)
97 stream_handle
|= ((pid
>> i
) & 1) << (31 - i
);
99 stream_handle
^= ++counter
;
100 return stream_handle
;
103 /* flush IB to the hardware */
104 static void flush(struct ruvd_decoder
*dec
)
106 uint32_t *pm4
= dec
->cs
->buf
;
109 while(dec
->cs
->cdw
% 16)
110 pm4
[dec
->cs
->cdw
++] = RUVD_PKT2();
112 dec
->ws
->cs_flush(dec
->cs
, 0);
115 /* add a new set register command to the IB */
116 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
118 uint32_t *pm4
= dec
->cs
->buf
;
119 pm4
[dec
->cs
->cdw
++] = RUVD_PKT0(reg
>> 2, 0);
120 pm4
[dec
->cs
->cdw
++] = val
;
123 /* send a command to the VCPU through the GPCOM registers */
124 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
125 struct radeon_winsys_cs_handle
* cs_buf
, uint32_t off
,
126 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
130 reloc_idx
= dec
->ws
->cs_add_reloc(dec
->cs
, cs_buf
, usage
, domain
);
131 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
132 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
133 set_reg(dec
, RUVD_GPCOM_VCPU_CMD
, cmd
<< 1);
136 /* send a message command to the VCPU */
137 static void send_msg(struct ruvd_decoder
*dec
, struct ruvd_msg
*msg
)
139 struct ruvd_buffer
* buf
;
142 /* grap a message buffer */
143 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
145 /* copy the message into it */
146 ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
150 memcpy(ptr
, msg
, sizeof(*msg
));
151 memset(ptr
+ sizeof(*msg
), 0, buf
->buf
->size
- sizeof(*msg
));
152 dec
->ws
->buffer_unmap(buf
->cs_handle
);
154 /* and send it to the hardware */
155 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->cs_handle
, 0,
156 RADEON_USAGE_READ
, RADEON_DOMAIN_VRAM
);
159 /* create a buffer in the winsys */
160 static bool create_buffer(struct ruvd_decoder
*dec
,
161 struct ruvd_buffer
*buffer
,
164 buffer
->buf
= dec
->ws
->buffer_create(dec
->ws
, size
, 4096, false,
165 RADEON_DOMAIN_GTT
| RADEON_DOMAIN_VRAM
);
169 buffer
->cs_handle
= dec
->ws
->buffer_get_cs_handle(buffer
->buf
);
170 if (!buffer
->cs_handle
)
176 /* destroy a buffer */
177 static void destroy_buffer(struct ruvd_buffer
*buffer
)
179 pb_reference(&buffer
->buf
, NULL
);
180 buffer
->cs_handle
= NULL
;
183 /* reallocate a buffer, preserving its content */
184 static bool resize_buffer(struct ruvd_decoder
*dec
,
185 struct ruvd_buffer
*new_buf
,
188 unsigned bytes
= MIN2(new_buf
->buf
->size
, new_size
);
189 struct ruvd_buffer old_buf
= *new_buf
;
190 void *src
= NULL
, *dst
= NULL
;
192 if (!create_buffer(dec
, new_buf
, new_size
))
195 src
= dec
->ws
->buffer_map(old_buf
.cs_handle
, dec
->cs
, PIPE_TRANSFER_READ
);
199 dst
= dec
->ws
->buffer_map(new_buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
203 memcpy(dst
, src
, bytes
);
204 if (new_size
> bytes
) {
207 memset(dst
, 0, new_size
);
209 dec
->ws
->buffer_unmap(new_buf
->cs_handle
);
210 dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
211 destroy_buffer(&old_buf
);
215 if (src
) dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
216 destroy_buffer(new_buf
);
221 /* clear the buffer with zeros */
222 static void clear_buffer(struct ruvd_decoder
*dec
,
223 struct ruvd_buffer
* buffer
)
225 //TODO: let the GPU do the job
226 void *ptr
= dec
->ws
->buffer_map(buffer
->cs_handle
, dec
->cs
,
227 PIPE_TRANSFER_WRITE
);
231 memset(ptr
, 0, buffer
->buf
->size
);
232 dec
->ws
->buffer_unmap(buffer
->cs_handle
);
235 /* cycle to the next set of buffers */
236 static void next_buffer(struct ruvd_decoder
*dec
)
239 dec
->cur_buffer
%= NUM_BUFFERS
;
242 /* convert the profile into something UVD understands */
243 static uint32_t profile2stream_type(enum pipe_video_profile profile
)
245 switch (u_reduce_video_profile(profile
)) {
246 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
247 return RUVD_CODEC_H264
;
249 case PIPE_VIDEO_CODEC_VC1
:
250 return RUVD_CODEC_VC1
;
252 case PIPE_VIDEO_CODEC_MPEG12
:
253 return RUVD_CODEC_MPEG2
;
255 case PIPE_VIDEO_CODEC_MPEG4
:
256 return RUVD_CODEC_MPEG4
;
264 /* calculate size of reference picture buffer */
265 static unsigned calc_dpb_size(enum pipe_video_profile profile
,
266 unsigned width
, unsigned height
,
267 unsigned max_references
)
269 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
271 // always align them to MB size for dpb calculation
272 width
= align(width
, VL_MACROBLOCK_WIDTH
);
273 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
275 // always one more for currently decoded picture
278 // aligned size of a single frame
279 image_size
= width
* height
;
280 image_size
+= image_size
/ 2;
281 image_size
= align(image_size
, 1024);
283 // picture width & height in 16 pixel units
284 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
285 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
287 switch (u_reduce_video_profile(profile
)) {
288 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
289 // the firmware seems to allways assume a minimum of ref frames
290 max_references
= MAX2(NUM_H264_REFS
, max_references
);
292 // reference picture buffer
293 dpb_size
= image_size
* max_references
;
295 // macroblock context buffer
296 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
299 dpb_size
+= width_in_mb
* height_in_mb
* 32;
302 case PIPE_VIDEO_CODEC_VC1
:
303 // reference picture buffer
304 dpb_size
= image_size
* max_references
;
307 dpb_size
+= width_in_mb
* height_in_mb
* 128;
310 dpb_size
+= width_in_mb
* 64;
313 dpb_size
+= width_in_mb
* 128;
316 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
319 case PIPE_VIDEO_CODEC_MPEG12
:
320 // reference picture buffer, must be big enough for all frames
321 dpb_size
= image_size
* NUM_MPEG2_REFS
;
324 case PIPE_VIDEO_CODEC_MPEG4
:
325 // reference picture buffer
326 dpb_size
= image_size
* max_references
;
329 dpb_size
+= width_in_mb
* height_in_mb
* 64;
332 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
336 // something is missing here
339 // at least use a sane default value
340 dpb_size
= 32 * 1024 * 1024;
346 /* get h264 specific message bits */
347 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
349 struct ruvd_h264 result
;
351 memset(&result
, 0, sizeof(result
));
352 switch (pic
->base
.profile
) {
353 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
354 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
357 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
358 result
.profile
= RUVD_H264_PROFILE_MAIN
;
361 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
362 result
.profile
= RUVD_H264_PROFILE_HIGH
;
369 if (((dec
->base
.width
* dec
->base
.height
) >> 8) <= 1620)
374 result
.sps_info_flags
= 0;
375 result
.sps_info_flags
|= pic
->direct_8x8_inference_flag
<< 0;
376 result
.sps_info_flags
|= pic
->mb_adaptive_frame_field_flag
<< 1;
377 result
.sps_info_flags
|= pic
->frame_mbs_only_flag
<< 2;
378 result
.sps_info_flags
|= pic
->delta_pic_order_always_zero_flag
<< 3;
380 result
.pps_info_flags
= 0;
381 result
.pps_info_flags
|= pic
->transform_8x8_mode_flag
<< 0;
382 result
.pps_info_flags
|= pic
->redundant_pic_cnt_present_flag
<< 1;
383 result
.pps_info_flags
|= pic
->constrained_intra_pred_flag
<< 2;
384 result
.pps_info_flags
|= pic
->deblocking_filter_control_present_flag
<< 3;
385 result
.pps_info_flags
|= pic
->weighted_bipred_idc
<< 4;
386 result
.pps_info_flags
|= pic
->weighted_pred_flag
<< 6;
387 result
.pps_info_flags
|= pic
->pic_order_present_flag
<< 7;
388 result
.pps_info_flags
|= pic
->entropy_coding_mode_flag
<< 8;
390 result
.chroma_format
= 0x1;
391 result
.bit_depth_luma_minus8
= 0;
392 result
.bit_depth_chroma_minus8
= 0;
394 result
.log2_max_frame_num_minus4
= pic
->log2_max_frame_num_minus4
;
395 result
.pic_order_cnt_type
= pic
->pic_order_cnt_type
;
396 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->log2_max_pic_order_cnt_lsb_minus4
;
397 result
.num_ref_frames
= pic
->num_ref_frames
;
398 result
.pic_init_qp_minus26
= pic
->pic_init_qp_minus26
;
399 result
.chroma_qp_index_offset
= pic
->chroma_qp_index_offset
;
400 result
.second_chroma_qp_index_offset
= pic
->second_chroma_qp_index_offset
;
402 result
.num_slice_groups_minus1
= 0;
403 result
.slice_group_map_type
= 0;
405 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
406 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
408 result
.slice_group_change_rate_minus1
= 0;
410 memcpy(result
.scaling_list_4x4
, pic
->scaling_lists_4x4
, 6*64);
411 memcpy(result
.scaling_list_8x8
, pic
->scaling_lists_8x8
, 2*64);
413 result
.frame_num
= pic
->frame_num
;
414 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
415 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
416 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
417 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
419 result
.decoded_pic_idx
= pic
->frame_num
;
424 /* get vc1 specific message bits */
425 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
427 struct ruvd_vc1 result
;
429 memset(&result
, 0, sizeof(result
));
430 switch(pic
->base
.profile
) {
431 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
432 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
435 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
436 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
439 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
440 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
446 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_VC1_ADVANCED
) {
449 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
450 result
.sps_info_flags
|= pic
->pulldown
<< 6;
451 result
.sps_info_flags
|= pic
->interlace
<< 5;
452 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
453 result
.sps_info_flags
|= pic
->psf
<< 1;
455 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
456 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
457 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
458 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
459 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
460 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
461 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
465 result
.pps_info_flags
|= pic
->multires
<< 21;
466 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
467 result
.pps_info_flags
|= pic
->rangered
<< 19;
468 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
471 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
472 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
474 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
475 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
476 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
477 result
.pps_info_flags
|= pic
->dquant
<< 1;
478 result
.pps_info_flags
|= pic
->vstransform
<< 0;
479 result
.pps_info_flags
|= pic
->overlap
<< 11;
480 result
.pps_info_flags
|= pic
->quantizer
<< 9;
486 uint8_t frame_coding_mode
487 uint8_t deblockEnable
491 result
.chroma_format
= 1;
495 /* extract the frame number from a referenced video buffer */
496 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
498 uint32_t min
= dec
->frame_number
- NUM_MPEG2_REFS
;
499 uint32_t max
= dec
->frame_number
- 1;
502 /* seems to be the most sane fallback */
506 /* get the frame number from the associated data */
507 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
509 /* limit the frame number to a valid range */
510 return MAX2(MIN2(frame
, max
), min
);
513 /* get mpeg2 specific msg bits */
514 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
515 struct pipe_mpeg12_picture_desc
*pic
)
517 struct ruvd_mpeg2 result
;
520 memset(&result
, 0, sizeof(result
));
521 result
.decoded_pic_idx
= dec
->frame_number
;
522 for (i
= 0; i
< 2; ++i
)
523 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
525 result
.load_intra_quantiser_matrix
= 1;
526 result
.load_nonintra_quantiser_matrix
= 1;
527 memcpy(&result
.intra_quantiser_matrix
, pic
->intra_matrix
, 64);
528 memcpy(&result
.nonintra_quantiser_matrix
, pic
->non_intra_matrix
, 64);
530 result
.profile_and_level_indication
= 0;
531 result
.chroma_format
= 0x1;
533 result
.picture_coding_type
= pic
->picture_coding_type
;
534 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
535 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
536 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
537 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
538 result
.intra_dc_precision
= pic
->intra_dc_precision
;
539 result
.pic_structure
= pic
->picture_structure
;
540 result
.top_field_first
= pic
->top_field_first
;
541 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
542 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
543 result
.q_scale_type
= pic
->q_scale_type
;
544 result
.intra_vlc_format
= pic
->intra_vlc_format
;
545 result
.alternate_scan
= pic
->alternate_scan
;
550 /* get mpeg4 specific msg bits */
551 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
552 struct pipe_mpeg4_picture_desc
*pic
)
554 struct ruvd_mpeg4 result
;
556 memset(&result
, 0, sizeof(result
));
557 result
.decoded_pic_idx
= dec
->frame_number
;
558 for (i
= 0; i
< 2; ++i
)
559 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
561 result
.video_object_layer_width
= dec
->base
.width
;
562 result
.video_object_layer_height
= dec
->base
.height
;
564 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
565 result
.quant_type
= pic
->quant_type
;
567 result
.flags
|= pic
->short_video_header
<< 0;
568 //result.flags |= obmc_disable << 1;
569 result
.flags
|= pic
->interlaced
<< 2;
570 result
.flags
|= 1 << 3; // load_intra_quant_mat
571 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
572 result
.flags
|= pic
->quarter_sample
<< 5;
573 //result.flags |= complexity_estimation_disable << 6
574 result
.flags
|= pic
->resync_marker_disable
<< 7;
575 //result.flags |= data_partitioned << 8;
576 //result.flags |= reversible_vlc << 9;
577 //result.flags |= newpred_enable << 10;
578 //result.flags |= reduced_resolution_vop_enable << 11;
579 //result.flags |= scalability << 12;
580 //result.flags |= is_object_layer_identifier << 13;
581 //result.flags |= fixed_vop_rate << 14;
582 //result.flags |= newpred_segment_type << 15;
584 memcpy(&result
.intra_quant_mat
, pic
->intra_matrix
, 64);
585 memcpy(&result
.nonintra_quant_mat
, pic
->non_intra_matrix
, 64);
590 uint8_t vop_coding_type
591 uint8_t vop_fcode_forward
592 uint8_t vop_fcode_backward
593 uint8_t rounding_control
594 uint8_t alternate_vertical_scan_flag
595 uint8_t top_field_first
602 * destroy this video decoder
604 static void ruvd_destroy(struct pipe_video_decoder
*decoder
)
606 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
612 memset(&msg
, 0, sizeof(msg
));
613 msg
.size
= sizeof(msg
);
614 msg
.msg_type
= RUVD_MSG_DESTROY
;
615 msg
.stream_handle
= dec
->stream_handle
;
620 dec
->ws
->cs_destroy(dec
->cs
);
622 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
623 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
624 destroy_buffer(&dec
->bs_buffers
[i
]);
627 destroy_buffer(&dec
->dpb
);
632 /* free associated data in the video buffer callback */
633 static void ruvd_destroy_associated_data(void *data
)
635 /* NOOP, since we only use an intptr */
639 * start decoding of a new frame
641 static void ruvd_begin_frame(struct pipe_video_decoder
*decoder
,
642 struct pipe_video_buffer
*target
,
643 struct pipe_picture_desc
*picture
)
645 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
650 frame
= ++dec
->frame_number
;
651 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
652 &ruvd_destroy_associated_data
);
655 dec
->bs_ptr
= dec
->ws
->buffer_map(
656 dec
->bs_buffers
[dec
->cur_buffer
].cs_handle
,
657 dec
->cs
, PIPE_TRANSFER_WRITE
);
661 * decode a macroblock
663 static void ruvd_decode_macroblock(struct pipe_video_decoder
*decoder
,
664 struct pipe_video_buffer
*target
,
665 struct pipe_picture_desc
*picture
,
666 const struct pipe_macroblock
*macroblocks
,
667 unsigned num_macroblocks
)
669 /* not supported (yet) */
676 static void ruvd_decode_bitstream(struct pipe_video_decoder
*decoder
,
677 struct pipe_video_buffer
*target
,
678 struct pipe_picture_desc
*picture
,
679 unsigned num_buffers
,
680 const void * const *buffers
,
681 const unsigned *sizes
)
683 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
691 for (i
= 0; i
< num_buffers
; ++i
) {
692 struct ruvd_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
693 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
695 if (new_size
> buf
->buf
->size
) {
696 dec
->ws
->buffer_unmap(buf
->cs_handle
);
697 if (!resize_buffer(dec
, buf
, new_size
)) {
698 RUVD_ERR("Can't resize bitstream buffer!");
702 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
,
703 PIPE_TRANSFER_WRITE
);
707 dec
->bs_ptr
+= dec
->bs_size
;
710 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
711 dec
->bs_size
+= sizes
[i
];
712 dec
->bs_ptr
+= sizes
[i
];
717 * end decoding of the current frame
719 static void ruvd_end_frame(struct pipe_video_decoder
*decoder
,
720 struct pipe_video_buffer
*target
,
721 struct pipe_picture_desc
*picture
)
723 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
724 struct radeon_winsys_cs_handle
*dt
;
725 struct ruvd_buffer
*msg_fb_buf
, *bs_buf
;
734 msg_fb_buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
735 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
737 bs_size
= align(dec
->bs_size
, 128);
738 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
739 dec
->ws
->buffer_unmap(bs_buf
->cs_handle
);
741 memset(&msg
, 0, sizeof(msg
));
742 msg
.size
= sizeof(msg
);
743 msg
.msg_type
= RUVD_MSG_DECODE
;
744 msg
.stream_handle
= dec
->stream_handle
;
745 msg
.status_report_feedback_number
= dec
->frame_number
;
747 msg
.decode
.stream_type
= profile2stream_type(dec
->base
.profile
);
748 msg
.decode
.decode_flags
= 0x1;
749 msg
.decode
.width_in_samples
= dec
->base
.width
;
750 msg
.decode
.height_in_samples
= dec
->base
.height
;
752 msg
.decode
.dpb_size
= dec
->dpb
.buf
->size
;
753 msg
.decode
.bsd_size
= bs_size
;
755 dt
= dec
->set_dtb(&msg
, (struct vl_video_buffer
*)target
);
757 switch (u_reduce_video_profile(picture
->profile
)) {
758 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
759 msg
.decode
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
762 case PIPE_VIDEO_CODEC_VC1
:
763 msg
.decode
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
766 case PIPE_VIDEO_CODEC_MPEG12
:
767 msg
.decode
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
770 case PIPE_VIDEO_CODEC_MPEG4
:
771 msg
.decode
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
779 msg
.decode
.db_surf_tile_config
= msg
.decode
.dt_surf_tile_config
;
780 msg
.decode
.extension_support
= 0x1;
783 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.cs_handle
, 0,
784 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
785 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->cs_handle
,
786 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
787 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
788 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
789 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_buf
->cs_handle
,
790 0x1000, RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
791 set_reg(dec
, RUVD_ENGINE_CNTL
, 1);
798 * flush any outstanding command buffers to the hardware
800 static void ruvd_flush(struct pipe_video_decoder
*decoder
)
805 * create and UVD decoder
807 struct pipe_video_decoder
*ruvd_create_decoder(struct pipe_context
*context
,
808 enum pipe_video_profile profile
,
809 enum pipe_video_entrypoint entrypoint
,
810 enum pipe_video_chroma_format chroma_format
,
811 unsigned width
, unsigned height
,
812 unsigned max_references
, bool expect_chunked_decode
,
813 struct radeon_winsys
* ws
,
814 ruvd_set_dtb set_dtb
)
816 unsigned dpb_size
= calc_dpb_size(profile
, width
, height
, max_references
);
817 struct ruvd_decoder
*dec
;
821 switch(u_reduce_video_profile(profile
)) {
822 case PIPE_VIDEO_CODEC_MPEG12
:
823 if (entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
)
824 return vl_create_mpeg12_decoder(context
, profile
, entrypoint
,
825 chroma_format
, width
,
826 height
, max_references
, expect_chunked_decode
);
829 case PIPE_VIDEO_CODEC_MPEG4
:
830 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
831 width
= align(width
, VL_MACROBLOCK_WIDTH
);
832 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
840 dec
= CALLOC_STRUCT(ruvd_decoder
);
845 dec
->base
.context
= context
;
846 dec
->base
.profile
= profile
;
847 dec
->base
.entrypoint
= entrypoint
;
848 dec
->base
.chroma_format
= chroma_format
;
849 dec
->base
.width
= width
;
850 dec
->base
.height
= height
;
852 dec
->base
.destroy
= ruvd_destroy
;
853 dec
->base
.begin_frame
= ruvd_begin_frame
;
854 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
855 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
856 dec
->base
.end_frame
= ruvd_end_frame
;
857 dec
->base
.flush
= ruvd_flush
;
859 dec
->set_dtb
= set_dtb
;
860 dec
->stream_handle
= alloc_stream_handle();
862 dec
->cs
= ws
->cs_create(ws
, RING_UVD
);
864 RUVD_ERR("Can't get command submission context.\n");
868 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
869 unsigned msg_fb_size
= align(sizeof(struct ruvd_msg
), 0x1000) + 0x1000;
870 if (!create_buffer(dec
, &dec
->msg_fb_buffers
[i
], msg_fb_size
)) {
871 RUVD_ERR("Can't allocated message buffers.\n");
875 if (!create_buffer(dec
, &dec
->bs_buffers
[i
], 4096)) {
876 RUVD_ERR("Can't allocated bitstream buffers.\n");
880 clear_buffer(dec
, &dec
->msg_fb_buffers
[i
]);
881 clear_buffer(dec
, &dec
->bs_buffers
[i
]);
884 if (!create_buffer(dec
, &dec
->dpb
, dpb_size
)) {
885 RUVD_ERR("Can't allocated dpb.\n");
889 clear_buffer(dec
, &dec
->dpb
);
891 memset(&msg
, 0, sizeof(msg
));
892 msg
.size
= sizeof(msg
);
893 msg
.msg_type
= RUVD_MSG_CREATE
;
894 msg
.stream_handle
= dec
->stream_handle
;
895 msg
.create
.stream_type
= profile2stream_type(dec
->base
.profile
);
896 msg
.create
.width_in_samples
= dec
->base
.width
;
897 msg
.create
.height_in_samples
= dec
->base
.height
;
898 msg
.create
.dpb_size
= dec
->dpb
.buf
->size
;
906 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
908 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
909 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
910 destroy_buffer(&dec
->bs_buffers
[i
]);
913 destroy_buffer(&dec
->dpb
);
921 * join surfaces into the same buffer with identical tiling params
922 * sumup their sizes and replace the backend buffers with a single bo
924 void ruvd_join_surfaces(struct radeon_winsys
* ws
, unsigned bind
,
925 struct pb_buffer
** buffers
[VL_NUM_COMPONENTS
],
926 struct radeon_surface
*surfaces
[VL_NUM_COMPONENTS
])
928 unsigned best_tiling
, best_wh
, off
;
929 unsigned size
, alignment
;
930 struct pb_buffer
*pb
;
933 for (i
= 0, best_tiling
= 0, best_wh
= ~0; i
< VL_NUM_COMPONENTS
; ++i
) {
939 /* choose the smallest bank w/h for now */
940 wh
= surfaces
[i
]->bankw
* surfaces
[i
]->bankh
;
947 for (i
= 0, off
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
951 /* copy the tiling parameters */
952 surfaces
[i
]->bankw
= surfaces
[best_tiling
]->bankw
;
953 surfaces
[i
]->bankh
= surfaces
[best_tiling
]->bankh
;
954 surfaces
[i
]->mtilea
= surfaces
[best_tiling
]->mtilea
;
955 surfaces
[i
]->tile_split
= surfaces
[best_tiling
]->tile_split
;
957 /* adjust the texture layer offsets */
958 off
= align(off
, surfaces
[i
]->bo_alignment
);
959 for (j
= 0; j
< Elements(surfaces
[i
]->level
); ++j
)
960 surfaces
[i
]->level
[j
].offset
+= off
;
961 off
+= surfaces
[i
]->bo_size
;
964 for (i
= 0, size
= 0, alignment
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
965 if (!buffers
[i
] || !*buffers
[i
])
968 size
= align(size
, (*buffers
[i
])->alignment
);
969 size
+= (*buffers
[i
])->size
;
970 alignment
= MAX2(alignment
, (*buffers
[i
])->alignment
* 1);
976 /* TODO: 2D tiling workaround */
979 pb
= ws
->buffer_create(ws
, size
, alignment
, bind
, RADEON_DOMAIN_VRAM
);
983 for (i
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
984 if (!buffers
[i
] || !*buffers
[i
])
987 pb_reference(buffers
[i
], pb
);
990 pb_reference(&pb
, NULL
);
993 /* calculate top/bottom offset */
994 static unsigned texture_offset(struct radeon_surface
*surface
, unsigned layer
)
996 return surface
->level
[0].offset
+
997 layer
* surface
->level
[0].slice_size
;
1000 /* hw encode the aspect of macro tiles */
1001 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
1003 switch (macro_tile_aspect
) {
1005 case 1: macro_tile_aspect
= 0; break;
1006 case 2: macro_tile_aspect
= 1; break;
1007 case 4: macro_tile_aspect
= 2; break;
1008 case 8: macro_tile_aspect
= 3; break;
1010 return macro_tile_aspect
;
1013 /* hw encode the bank width and height */
1014 static unsigned bank_wh(unsigned bankwh
)
1018 case 1: bankwh
= 0; break;
1019 case 2: bankwh
= 1; break;
1020 case 4: bankwh
= 2; break;
1021 case 8: bankwh
= 3; break;
1027 * fill decoding target field from the luma and chroma surfaces
1029 void ruvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surface
*luma
,
1030 struct radeon_surface
*chroma
)
1032 msg
->decode
.dt_pitch
= luma
->level
[0].pitch_bytes
;
1033 switch (luma
->level
[0].mode
) {
1034 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1035 msg
->decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1036 msg
->decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1038 case RADEON_SURF_MODE_1D
:
1039 msg
->decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1040 msg
->decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
1042 case RADEON_SURF_MODE_2D
:
1043 msg
->decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1044 msg
->decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
1051 msg
->decode
.dt_luma_top_offset
= texture_offset(luma
, 0);
1052 msg
->decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0);
1053 if (msg
->decode
.dt_field_mode
) {
1054 msg
->decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1);
1055 msg
->decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1);
1057 msg
->decode
.dt_luma_bottom_offset
= msg
->decode
.dt_luma_top_offset
;
1058 msg
->decode
.dt_chroma_bottom_offset
= msg
->decode
.dt_chroma_top_offset
;
1061 assert(luma
->bankw
== chroma
->bankw
);
1062 assert(luma
->bankh
== chroma
->bankh
);
1063 assert(luma
->mtilea
== chroma
->mtilea
);
1065 msg
->decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->bankw
));
1066 msg
->decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->bankh
));
1067 msg
->decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->mtilea
));
1070 int ruvd_get_video_param(struct pipe_screen
*screen
,
1071 enum pipe_video_profile profile
,
1072 enum pipe_video_cap param
)
1075 case PIPE_VIDEO_CAP_SUPPORTED
:
1076 switch (u_reduce_video_profile(profile
)) {
1077 case PIPE_VIDEO_CODEC_MPEG12
:
1078 case PIPE_VIDEO_CODEC_MPEG4
:
1079 /* TODO not all hw families support MPEG4 */
1080 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
1081 case PIPE_VIDEO_CODEC_VC1
:
1086 case PIPE_VIDEO_CAP_NPOT_TEXTURES
:
1088 case PIPE_VIDEO_CAP_MAX_WIDTH
:
1090 case PIPE_VIDEO_CAP_MAX_HEIGHT
:
1092 case PIPE_VIDEO_CAP_PREFERED_FORMAT
:
1093 return PIPE_FORMAT_NV12
;
1094 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
1096 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
1097 return false; /* TODO: enable this */
1098 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE
:
1105 boolean
ruvd_is_format_supported(struct pipe_screen
*screen
,
1106 enum pipe_format format
,
1107 enum pipe_video_profile profile
)
1109 /* we can only handle this one anyway */
1110 return format
== PIPE_FORMAT_NV12
;