1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Christian König <christian.koenig@amd.com>
34 #include <sys/types.h>
40 #include "pipe/p_video_decoder.h"
42 #include "util/u_memory.h"
43 #include "util/u_video.h"
45 #include "vl/vl_defines.h"
46 #include "vl/vl_mpeg12_decoder.h"
48 #include "../../winsys/radeon/drm/radeon_winsys.h"
49 #include "radeon_uvd.h"
51 #define RUVD_ERR(fmt, args...) \
52 fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
56 #define NUM_MPEG2_REFS 6
57 #define NUM_H264_REFS 17
59 /* UVD buffer representation */
62 struct pb_buffer
* buf
;
63 struct radeon_winsys_cs_handle
* cs_handle
;
66 /* UVD decoder representation */
68 struct pipe_video_decoder base
;
72 unsigned stream_handle
;
73 unsigned frame_number
;
75 struct radeon_winsys
* ws
;
76 struct radeon_winsys_cs
* cs
;
80 struct ruvd_buffer msg_fb_buffers
[NUM_BUFFERS
];
81 struct ruvd_buffer bs_buffers
[NUM_BUFFERS
];
85 struct ruvd_buffer dpb
;
88 /* generate an UVD stream handle */
89 static unsigned alloc_stream_handle()
91 static unsigned counter
= 0;
92 unsigned stream_handle
= 0;
93 unsigned pid
= getpid();
96 for (i
= 0; i
< 32; ++i
)
97 stream_handle
|= ((pid
>> i
) & 1) << (31 - i
);
99 stream_handle
^= ++counter
;
100 return stream_handle
;
103 /* flush IB to the hardware */
104 static void flush(struct ruvd_decoder
*dec
)
106 uint32_t *pm4
= dec
->cs
->buf
;
109 while(dec
->cs
->cdw
% 16)
110 pm4
[dec
->cs
->cdw
++] = RUVD_PKT2();
112 dec
->ws
->cs_flush(dec
->cs
, 0, 0);
115 /* add a new set register command to the IB */
116 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
118 uint32_t *pm4
= dec
->cs
->buf
;
119 pm4
[dec
->cs
->cdw
++] = RUVD_PKT0(reg
>> 2, 0);
120 pm4
[dec
->cs
->cdw
++] = val
;
123 /* send a command to the VCPU through the GPCOM registers */
124 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
125 struct radeon_winsys_cs_handle
* cs_buf
, uint32_t off
,
126 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
130 reloc_idx
= dec
->ws
->cs_add_reloc(dec
->cs
, cs_buf
, usage
, domain
);
131 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
132 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
133 set_reg(dec
, RUVD_GPCOM_VCPU_CMD
, cmd
<< 1);
136 /* send a message command to the VCPU */
137 static void send_msg(struct ruvd_decoder
*dec
, struct ruvd_msg
*msg
)
139 struct ruvd_buffer
* buf
;
142 /* grap a message buffer */
143 buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
145 /* copy the message into it */
146 ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
150 memcpy(ptr
, msg
, sizeof(*msg
));
151 memset(ptr
+ sizeof(*msg
), 0, buf
->buf
->size
- sizeof(*msg
));
152 dec
->ws
->buffer_unmap(buf
->cs_handle
);
154 /* and send it to the hardware */
155 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->cs_handle
, 0,
156 RADEON_USAGE_READ
, RADEON_DOMAIN_VRAM
);
159 /* create a buffer in the winsys */
160 static bool create_buffer(struct ruvd_decoder
*dec
,
161 struct ruvd_buffer
*buffer
,
164 buffer
->buf
= dec
->ws
->buffer_create(dec
->ws
, size
, 4096, false,
165 RADEON_DOMAIN_GTT
| RADEON_DOMAIN_VRAM
);
169 buffer
->cs_handle
= dec
->ws
->buffer_get_cs_handle(buffer
->buf
);
170 if (!buffer
->cs_handle
)
176 /* destroy a buffer */
177 static void destroy_buffer(struct ruvd_buffer
*buffer
)
179 pb_reference(&buffer
->buf
, NULL
);
180 buffer
->cs_handle
= NULL
;
183 /* reallocate a buffer, preserving its content */
184 static bool resize_buffer(struct ruvd_decoder
*dec
,
185 struct ruvd_buffer
*new_buf
,
188 unsigned bytes
= MIN2(new_buf
->buf
->size
, new_size
);
189 struct ruvd_buffer old_buf
= *new_buf
;
190 void *src
= NULL
, *dst
= NULL
;
192 if (!create_buffer(dec
, new_buf
, new_size
))
195 src
= dec
->ws
->buffer_map(old_buf
.cs_handle
, dec
->cs
, PIPE_TRANSFER_READ
);
199 dst
= dec
->ws
->buffer_map(new_buf
->cs_handle
, dec
->cs
, PIPE_TRANSFER_WRITE
);
203 memcpy(dst
, src
, bytes
);
204 if (new_size
> bytes
) {
207 memset(dst
, 0, new_size
);
209 dec
->ws
->buffer_unmap(new_buf
->cs_handle
);
210 dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
211 destroy_buffer(&old_buf
);
215 if (src
) dec
->ws
->buffer_unmap(old_buf
.cs_handle
);
216 destroy_buffer(new_buf
);
221 /* clear the buffer with zeros */
222 static void clear_buffer(struct ruvd_decoder
*dec
,
223 struct ruvd_buffer
* buffer
)
225 //TODO: let the GPU do the job
226 void *ptr
= dec
->ws
->buffer_map(buffer
->cs_handle
, dec
->cs
,
227 PIPE_TRANSFER_WRITE
);
231 memset(ptr
, 0, buffer
->buf
->size
);
232 dec
->ws
->buffer_unmap(buffer
->cs_handle
);
235 /* cycle to the next set of buffers */
236 static void next_buffer(struct ruvd_decoder
*dec
)
239 dec
->cur_buffer
%= NUM_BUFFERS
;
242 /* convert the profile into something UVD understands */
243 static uint32_t profile2stream_type(enum pipe_video_profile profile
)
245 switch (u_reduce_video_profile(profile
)) {
246 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
247 return RUVD_CODEC_H264
;
249 case PIPE_VIDEO_CODEC_VC1
:
250 return RUVD_CODEC_VC1
;
252 case PIPE_VIDEO_CODEC_MPEG12
:
253 return RUVD_CODEC_MPEG2
;
255 case PIPE_VIDEO_CODEC_MPEG4
:
256 return RUVD_CODEC_MPEG4
;
264 /* calculate size of reference picture buffer */
265 static unsigned calc_dpb_size(enum pipe_video_profile profile
,
266 unsigned width
, unsigned height
,
267 unsigned max_references
)
269 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
271 // always align them to MB size for dpb calculation
272 width
= align(width
, VL_MACROBLOCK_WIDTH
);
273 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
275 // always one more for currently decoded picture
278 // aligned size of a single frame
279 image_size
= width
* height
;
280 image_size
+= image_size
/ 2;
281 image_size
= align(image_size
, 1024);
283 // picture width & height in 16 pixel units
284 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
285 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
287 switch (u_reduce_video_profile(profile
)) {
288 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
289 // the firmware seems to allways assume a minimum of ref frames
290 max_references
= MAX2(NUM_H264_REFS
, max_references
);
292 // reference picture buffer
293 dpb_size
= image_size
* max_references
;
295 // macroblock context buffer
296 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
299 dpb_size
+= width_in_mb
* height_in_mb
* 32;
302 case PIPE_VIDEO_CODEC_VC1
:
303 // reference picture buffer
304 dpb_size
= image_size
* max_references
;
307 dpb_size
+= width_in_mb
* height_in_mb
* 128;
310 dpb_size
+= width_in_mb
* 64;
313 dpb_size
+= width_in_mb
* 128;
316 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
319 case PIPE_VIDEO_CODEC_MPEG12
:
320 // reference picture buffer, must be big enough for all frames
321 dpb_size
= image_size
* NUM_MPEG2_REFS
;
324 case PIPE_VIDEO_CODEC_MPEG4
:
325 // reference picture buffer
326 dpb_size
= image_size
* max_references
;
329 dpb_size
+= width_in_mb
* height_in_mb
* 64;
332 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
336 // something is missing here
339 // at least use a sane default value
340 dpb_size
= 32 * 1024 * 1024;
346 /* get h264 specific message bits */
347 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
349 struct ruvd_h264 result
;
351 memset(&result
, 0, sizeof(result
));
352 switch (pic
->base
.profile
) {
353 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
354 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
357 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
358 result
.profile
= RUVD_H264_PROFILE_MAIN
;
361 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
362 result
.profile
= RUVD_H264_PROFILE_HIGH
;
369 if (((dec
->base
.width
* dec
->base
.height
) >> 8) <= 1620)
374 result
.sps_info_flags
= 0;
375 result
.sps_info_flags
|= pic
->direct_8x8_inference_flag
<< 0;
376 result
.sps_info_flags
|= pic
->mb_adaptive_frame_field_flag
<< 1;
377 result
.sps_info_flags
|= pic
->frame_mbs_only_flag
<< 2;
378 result
.sps_info_flags
|= pic
->delta_pic_order_always_zero_flag
<< 3;
380 result
.pps_info_flags
= 0;
381 result
.pps_info_flags
|= pic
->transform_8x8_mode_flag
<< 0;
382 result
.pps_info_flags
|= pic
->redundant_pic_cnt_present_flag
<< 1;
383 result
.pps_info_flags
|= pic
->constrained_intra_pred_flag
<< 2;
384 result
.pps_info_flags
|= pic
->deblocking_filter_control_present_flag
<< 3;
385 result
.pps_info_flags
|= pic
->weighted_bipred_idc
<< 4;
386 result
.pps_info_flags
|= pic
->weighted_pred_flag
<< 6;
387 result
.pps_info_flags
|= pic
->pic_order_present_flag
<< 7;
388 result
.pps_info_flags
|= pic
->entropy_coding_mode_flag
<< 8;
390 result
.chroma_format
= 0x1;
391 result
.bit_depth_luma_minus8
= 0;
392 result
.bit_depth_chroma_minus8
= 0;
394 result
.log2_max_frame_num_minus4
= pic
->log2_max_frame_num_minus4
;
395 result
.pic_order_cnt_type
= pic
->pic_order_cnt_type
;
396 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->log2_max_pic_order_cnt_lsb_minus4
;
397 result
.num_ref_frames
= pic
->num_ref_frames
;
398 result
.pic_init_qp_minus26
= pic
->pic_init_qp_minus26
;
399 result
.chroma_qp_index_offset
= pic
->chroma_qp_index_offset
;
400 result
.second_chroma_qp_index_offset
= pic
->second_chroma_qp_index_offset
;
402 result
.num_slice_groups_minus1
= 0;
403 result
.slice_group_map_type
= 0;
405 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
406 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
408 result
.slice_group_change_rate_minus1
= 0;
410 memcpy(result
.scaling_list_4x4
, pic
->scaling_lists_4x4
, 6*64);
411 memcpy(result
.scaling_list_8x8
, pic
->scaling_lists_8x8
, 2*64);
413 result
.frame_num
= pic
->frame_num
;
414 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
415 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
416 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
417 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
419 result
.decoded_pic_idx
= pic
->frame_num
;
424 /* get vc1 specific message bits */
425 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
427 struct ruvd_vc1 result
;
429 memset(&result
, 0, sizeof(result
));
430 switch(pic
->base
.profile
) {
431 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
432 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
435 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
436 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
439 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
440 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
446 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_VC1_ADVANCED
) {
449 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
450 result
.sps_info_flags
|= pic
->pulldown
<< 6;
451 result
.sps_info_flags
|= pic
->interlace
<< 5;
452 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
453 result
.sps_info_flags
|= pic
->psf
<< 1;
455 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
456 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
457 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
458 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
459 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
460 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
461 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
465 result
.pps_info_flags
|= pic
->multires
<< 21;
466 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
467 result
.pps_info_flags
|= pic
->rangered
<< 19;
468 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
471 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
472 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
474 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
475 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
476 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
477 result
.pps_info_flags
|= pic
->dquant
<< 1;
478 result
.pps_info_flags
|= pic
->vstransform
<< 0;
479 result
.pps_info_flags
|= pic
->overlap
<< 11;
480 result
.pps_info_flags
|= pic
->quantizer
<< 9;
486 uint8_t frame_coding_mode
487 uint8_t deblockEnable
491 result
.chroma_format
= 1;
495 /* extract the frame number from a referenced video buffer */
496 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
498 uint32_t min
= dec
->frame_number
- NUM_MPEG2_REFS
;
499 uint32_t max
= dec
->frame_number
- 1;
502 /* seems to be the most sane fallback */
506 /* get the frame number from the associated data */
507 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
509 /* limit the frame number to a valid range */
510 return MAX2(MIN2(frame
, max
), min
);
513 /* get mpeg2 specific msg bits */
514 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
515 struct pipe_mpeg12_picture_desc
*pic
)
517 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
518 struct ruvd_mpeg2 result
;
521 memset(&result
, 0, sizeof(result
));
522 result
.decoded_pic_idx
= dec
->frame_number
;
523 for (i
= 0; i
< 2; ++i
)
524 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
526 result
.load_intra_quantiser_matrix
= 1;
527 result
.load_nonintra_quantiser_matrix
= 1;
529 for (i
= 0; i
< 64; ++i
) {
530 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
531 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
534 result
.profile_and_level_indication
= 0;
535 result
.chroma_format
= 0x1;
537 result
.picture_coding_type
= pic
->picture_coding_type
;
538 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
539 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
540 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
541 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
542 result
.intra_dc_precision
= pic
->intra_dc_precision
;
543 result
.pic_structure
= pic
->picture_structure
;
544 result
.top_field_first
= pic
->top_field_first
;
545 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
546 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
547 result
.q_scale_type
= pic
->q_scale_type
;
548 result
.intra_vlc_format
= pic
->intra_vlc_format
;
549 result
.alternate_scan
= pic
->alternate_scan
;
554 /* get mpeg4 specific msg bits */
555 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
556 struct pipe_mpeg4_picture_desc
*pic
)
558 struct ruvd_mpeg4 result
;
561 memset(&result
, 0, sizeof(result
));
562 result
.decoded_pic_idx
= dec
->frame_number
;
563 for (i
= 0; i
< 2; ++i
)
564 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
566 result
.variant_type
= 0;
567 result
.profile_and_level_indication
= 0xF0; // ASP Level0
569 result
.video_object_layer_verid
= 0x5; // advanced simple
570 result
.video_object_layer_shape
= 0x0; // rectangular
572 result
.video_object_layer_width
= dec
->base
.width
;
573 result
.video_object_layer_height
= dec
->base
.height
;
575 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
577 result
.flags
|= pic
->short_video_header
<< 0;
578 //result.flags |= obmc_disable << 1;
579 result
.flags
|= pic
->interlaced
<< 2;
580 result
.flags
|= 1 << 3; // load_intra_quant_mat
581 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
582 result
.flags
|= pic
->quarter_sample
<< 5;
583 result
.flags
|= 1 << 6; // complexity_estimation_disable
584 result
.flags
|= pic
->resync_marker_disable
<< 7;
585 //result.flags |= data_partitioned << 8;
586 //result.flags |= reversible_vlc << 9;
587 result
.flags
|= 0 << 10; // newpred_enable
588 result
.flags
|= 0 << 11; // reduced_resolution_vop_enable
589 //result.flags |= scalability << 12;
590 //result.flags |= is_object_layer_identifier << 13;
591 //result.flags |= fixed_vop_rate << 14;
592 //result.flags |= newpred_segment_type << 15;
594 result
.quant_type
= pic
->quant_type
;
596 for (i
= 0; i
< 64; ++i
) {
597 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
598 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
604 uint8_t vop_coding_type
605 uint8_t vop_fcode_forward
606 uint8_t vop_fcode_backward
607 uint8_t rounding_control
608 uint8_t alternate_vertical_scan_flag
609 uint8_t top_field_first
616 * destroy this video decoder
618 static void ruvd_destroy(struct pipe_video_decoder
*decoder
)
620 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
626 memset(&msg
, 0, sizeof(msg
));
627 msg
.size
= sizeof(msg
);
628 msg
.msg_type
= RUVD_MSG_DESTROY
;
629 msg
.stream_handle
= dec
->stream_handle
;
634 dec
->ws
->cs_destroy(dec
->cs
);
636 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
637 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
638 destroy_buffer(&dec
->bs_buffers
[i
]);
641 destroy_buffer(&dec
->dpb
);
646 /* free associated data in the video buffer callback */
647 static void ruvd_destroy_associated_data(void *data
)
649 /* NOOP, since we only use an intptr */
653 * start decoding of a new frame
655 static void ruvd_begin_frame(struct pipe_video_decoder
*decoder
,
656 struct pipe_video_buffer
*target
,
657 struct pipe_picture_desc
*picture
)
659 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
664 frame
= ++dec
->frame_number
;
665 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
666 &ruvd_destroy_associated_data
);
669 dec
->bs_ptr
= dec
->ws
->buffer_map(
670 dec
->bs_buffers
[dec
->cur_buffer
].cs_handle
,
671 dec
->cs
, PIPE_TRANSFER_WRITE
);
675 * decode a macroblock
677 static void ruvd_decode_macroblock(struct pipe_video_decoder
*decoder
,
678 struct pipe_video_buffer
*target
,
679 struct pipe_picture_desc
*picture
,
680 const struct pipe_macroblock
*macroblocks
,
681 unsigned num_macroblocks
)
683 /* not supported (yet) */
690 static void ruvd_decode_bitstream(struct pipe_video_decoder
*decoder
,
691 struct pipe_video_buffer
*target
,
692 struct pipe_picture_desc
*picture
,
693 unsigned num_buffers
,
694 const void * const *buffers
,
695 const unsigned *sizes
)
697 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
705 for (i
= 0; i
< num_buffers
; ++i
) {
706 struct ruvd_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
707 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
709 if (new_size
> buf
->buf
->size
) {
710 dec
->ws
->buffer_unmap(buf
->cs_handle
);
711 if (!resize_buffer(dec
, buf
, new_size
)) {
712 RUVD_ERR("Can't resize bitstream buffer!");
716 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->cs_handle
, dec
->cs
,
717 PIPE_TRANSFER_WRITE
);
721 dec
->bs_ptr
+= dec
->bs_size
;
724 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
725 dec
->bs_size
+= sizes
[i
];
726 dec
->bs_ptr
+= sizes
[i
];
731 * end decoding of the current frame
733 static void ruvd_end_frame(struct pipe_video_decoder
*decoder
,
734 struct pipe_video_buffer
*target
,
735 struct pipe_picture_desc
*picture
)
737 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
738 struct radeon_winsys_cs_handle
*dt
;
739 struct ruvd_buffer
*msg_fb_buf
, *bs_buf
;
748 msg_fb_buf
= &dec
->msg_fb_buffers
[dec
->cur_buffer
];
749 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
751 bs_size
= align(dec
->bs_size
, 128);
752 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
753 dec
->ws
->buffer_unmap(bs_buf
->cs_handle
);
755 memset(&msg
, 0, sizeof(msg
));
756 msg
.size
= sizeof(msg
);
757 msg
.msg_type
= RUVD_MSG_DECODE
;
758 msg
.stream_handle
= dec
->stream_handle
;
759 msg
.status_report_feedback_number
= dec
->frame_number
;
761 msg
.body
.decode
.stream_type
= profile2stream_type(dec
->base
.profile
);
762 msg
.body
.decode
.decode_flags
= 0x1;
763 msg
.body
.decode
.width_in_samples
= dec
->base
.width
;
764 msg
.body
.decode
.height_in_samples
= dec
->base
.height
;
766 msg
.body
.decode
.dpb_size
= dec
->dpb
.buf
->size
;
767 msg
.body
.decode
.bsd_size
= bs_size
;
769 dt
= dec
->set_dtb(&msg
, (struct vl_video_buffer
*)target
);
771 switch (u_reduce_video_profile(picture
->profile
)) {
772 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
773 msg
.body
.decode
.codec
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
776 case PIPE_VIDEO_CODEC_VC1
:
777 msg
.body
.decode
.codec
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
780 case PIPE_VIDEO_CODEC_MPEG12
:
781 msg
.body
.decode
.codec
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
784 case PIPE_VIDEO_CODEC_MPEG4
:
785 msg
.body
.decode
.codec
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
793 msg
.body
.decode
.db_surf_tile_config
= msg
.body
.decode
.dt_surf_tile_config
;
794 msg
.body
.decode
.extension_support
= 0x1;
797 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.cs_handle
, 0,
798 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
799 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->cs_handle
,
800 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
801 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
802 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
803 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_buf
->cs_handle
,
804 0x1000, RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
805 set_reg(dec
, RUVD_ENGINE_CNTL
, 1);
812 * flush any outstanding command buffers to the hardware
814 static void ruvd_flush(struct pipe_video_decoder
*decoder
)
819 * create and UVD decoder
821 struct pipe_video_decoder
*ruvd_create_decoder(struct pipe_context
*context
,
822 enum pipe_video_profile profile
,
823 enum pipe_video_entrypoint entrypoint
,
824 enum pipe_video_chroma_format chroma_format
,
825 unsigned width
, unsigned height
,
826 unsigned max_references
, bool expect_chunked_decode
,
827 struct radeon_winsys
* ws
,
828 ruvd_set_dtb set_dtb
)
830 unsigned dpb_size
= calc_dpb_size(profile
, width
, height
, max_references
);
831 struct ruvd_decoder
*dec
;
835 switch(u_reduce_video_profile(profile
)) {
836 case PIPE_VIDEO_CODEC_MPEG12
:
837 if (entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
)
838 return vl_create_mpeg12_decoder(context
, profile
, entrypoint
,
839 chroma_format
, width
,
840 height
, max_references
, expect_chunked_decode
);
843 case PIPE_VIDEO_CODEC_MPEG4
:
844 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
845 width
= align(width
, VL_MACROBLOCK_WIDTH
);
846 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
854 dec
= CALLOC_STRUCT(ruvd_decoder
);
859 dec
->base
.context
= context
;
860 dec
->base
.profile
= profile
;
861 dec
->base
.entrypoint
= entrypoint
;
862 dec
->base
.chroma_format
= chroma_format
;
863 dec
->base
.width
= width
;
864 dec
->base
.height
= height
;
866 dec
->base
.destroy
= ruvd_destroy
;
867 dec
->base
.begin_frame
= ruvd_begin_frame
;
868 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
869 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
870 dec
->base
.end_frame
= ruvd_end_frame
;
871 dec
->base
.flush
= ruvd_flush
;
873 dec
->set_dtb
= set_dtb
;
874 dec
->stream_handle
= alloc_stream_handle();
876 dec
->cs
= ws
->cs_create(ws
, RING_UVD
, NULL
);
878 RUVD_ERR("Can't get command submission context.\n");
882 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
883 unsigned msg_fb_size
= align(sizeof(struct ruvd_msg
), 0x1000) + 0x1000;
884 if (!create_buffer(dec
, &dec
->msg_fb_buffers
[i
], msg_fb_size
)) {
885 RUVD_ERR("Can't allocated message buffers.\n");
889 if (!create_buffer(dec
, &dec
->bs_buffers
[i
], 4096)) {
890 RUVD_ERR("Can't allocated bitstream buffers.\n");
894 clear_buffer(dec
, &dec
->msg_fb_buffers
[i
]);
895 clear_buffer(dec
, &dec
->bs_buffers
[i
]);
898 if (!create_buffer(dec
, &dec
->dpb
, dpb_size
)) {
899 RUVD_ERR("Can't allocated dpb.\n");
903 clear_buffer(dec
, &dec
->dpb
);
905 memset(&msg
, 0, sizeof(msg
));
906 msg
.size
= sizeof(msg
);
907 msg
.msg_type
= RUVD_MSG_CREATE
;
908 msg
.stream_handle
= dec
->stream_handle
;
909 msg
.body
.create
.stream_type
= profile2stream_type(dec
->base
.profile
);
910 msg
.body
.create
.width_in_samples
= dec
->base
.width
;
911 msg
.body
.create
.height_in_samples
= dec
->base
.height
;
912 msg
.body
.create
.dpb_size
= dec
->dpb
.buf
->size
;
920 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
922 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
923 destroy_buffer(&dec
->msg_fb_buffers
[i
]);
924 destroy_buffer(&dec
->bs_buffers
[i
]);
927 destroy_buffer(&dec
->dpb
);
935 * join surfaces into the same buffer with identical tiling params
936 * sumup their sizes and replace the backend buffers with a single bo
938 void ruvd_join_surfaces(struct radeon_winsys
* ws
, unsigned bind
,
939 struct pb_buffer
** buffers
[VL_NUM_COMPONENTS
],
940 struct radeon_surface
*surfaces
[VL_NUM_COMPONENTS
])
942 unsigned best_tiling
, best_wh
, off
;
943 unsigned size
, alignment
;
944 struct pb_buffer
*pb
;
947 for (i
= 0, best_tiling
= 0, best_wh
= ~0; i
< VL_NUM_COMPONENTS
; ++i
) {
953 /* choose the smallest bank w/h for now */
954 wh
= surfaces
[i
]->bankw
* surfaces
[i
]->bankh
;
961 for (i
= 0, off
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
965 /* copy the tiling parameters */
966 surfaces
[i
]->bankw
= surfaces
[best_tiling
]->bankw
;
967 surfaces
[i
]->bankh
= surfaces
[best_tiling
]->bankh
;
968 surfaces
[i
]->mtilea
= surfaces
[best_tiling
]->mtilea
;
969 surfaces
[i
]->tile_split
= surfaces
[best_tiling
]->tile_split
;
971 /* adjust the texture layer offsets */
972 off
= align(off
, surfaces
[i
]->bo_alignment
);
973 for (j
= 0; j
< Elements(surfaces
[i
]->level
); ++j
)
974 surfaces
[i
]->level
[j
].offset
+= off
;
975 off
+= surfaces
[i
]->bo_size
;
978 for (i
= 0, size
= 0, alignment
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
979 if (!buffers
[i
] || !*buffers
[i
])
982 size
= align(size
, (*buffers
[i
])->alignment
);
983 size
+= (*buffers
[i
])->size
;
984 alignment
= MAX2(alignment
, (*buffers
[i
])->alignment
* 1);
990 /* TODO: 2D tiling workaround */
993 pb
= ws
->buffer_create(ws
, size
, alignment
, bind
, RADEON_DOMAIN_VRAM
);
997 for (i
= 0; i
< VL_NUM_COMPONENTS
; ++i
) {
998 if (!buffers
[i
] || !*buffers
[i
])
1001 pb_reference(buffers
[i
], pb
);
1004 pb_reference(&pb
, NULL
);
1007 /* calculate top/bottom offset */
1008 static unsigned texture_offset(struct radeon_surface
*surface
, unsigned layer
)
1010 return surface
->level
[0].offset
+
1011 layer
* surface
->level
[0].slice_size
;
1014 /* hw encode the aspect of macro tiles */
1015 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
1017 switch (macro_tile_aspect
) {
1019 case 1: macro_tile_aspect
= 0; break;
1020 case 2: macro_tile_aspect
= 1; break;
1021 case 4: macro_tile_aspect
= 2; break;
1022 case 8: macro_tile_aspect
= 3; break;
1024 return macro_tile_aspect
;
1027 /* hw encode the bank width and height */
1028 static unsigned bank_wh(unsigned bankwh
)
1032 case 1: bankwh
= 0; break;
1033 case 2: bankwh
= 1; break;
1034 case 4: bankwh
= 2; break;
1035 case 8: bankwh
= 3; break;
1041 * fill decoding target field from the luma and chroma surfaces
1043 void ruvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surface
*luma
,
1044 struct radeon_surface
*chroma
)
1046 msg
->body
.decode
.dt_pitch
= luma
->level
[0].pitch_bytes
;
1047 switch (luma
->level
[0].mode
) {
1048 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1049 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1050 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1052 case RADEON_SURF_MODE_1D
:
1053 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1054 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
1056 case RADEON_SURF_MODE_2D
:
1057 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1058 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
1065 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0);
1066 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0);
1067 if (msg
->body
.decode
.dt_field_mode
) {
1068 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1);
1069 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1);
1071 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
1072 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
1075 assert(luma
->bankw
== chroma
->bankw
);
1076 assert(luma
->bankh
== chroma
->bankh
);
1077 assert(luma
->mtilea
== chroma
->mtilea
);
1079 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->bankw
));
1080 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->bankh
));
1081 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->mtilea
));
1084 int ruvd_get_video_param(struct pipe_screen
*screen
,
1085 enum pipe_video_profile profile
,
1086 enum pipe_video_cap param
)
1089 case PIPE_VIDEO_CAP_SUPPORTED
:
1090 switch (u_reduce_video_profile(profile
)) {
1091 case PIPE_VIDEO_CODEC_MPEG12
:
1092 case PIPE_VIDEO_CODEC_MPEG4
:
1093 case PIPE_VIDEO_CODEC_MPEG4_AVC
:
1094 case PIPE_VIDEO_CODEC_VC1
:
1099 case PIPE_VIDEO_CAP_NPOT_TEXTURES
:
1101 case PIPE_VIDEO_CAP_MAX_WIDTH
:
1103 case PIPE_VIDEO_CAP_MAX_HEIGHT
:
1105 case PIPE_VIDEO_CAP_PREFERED_FORMAT
:
1106 return PIPE_FORMAT_NV12
;
1107 case PIPE_VIDEO_CAP_PREFERS_INTERLACED
:
1109 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED
:
1111 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE
:
1118 boolean
ruvd_is_format_supported(struct pipe_screen
*screen
,
1119 enum pipe_format format
,
1120 enum pipe_video_profile profile
)
1122 /* we can only handle this one anyway */
1123 return format
== PIPE_FORMAT_NV12
;