1 /**************************************************************************
3 * Copyright 2011 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include <sys/types.h>
34 #include "pipe/p_video_codec.h"
36 #include "util/u_memory.h"
37 #include "util/u_video.h"
39 #include "vl/vl_defines.h"
40 #include "vl/vl_mpeg12_decoder.h"
42 #include "radeonsi/si_pipe.h"
43 #include "radeon_video.h"
44 #include "radeon_uvd.h"
48 #define NUM_MPEG2_REFS 6
49 #define NUM_H264_REFS 17
50 #define NUM_VC1_REFS 5
52 #define FB_BUFFER_OFFSET 0x1000
53 #define FB_BUFFER_SIZE 2048
54 #define FB_BUFFER_SIZE_TONGA (2048 * 64)
55 #define IT_SCALING_TABLE_SIZE 992
56 #define UVD_SESSION_CONTEXT_SIZE (128 * 1024)
58 /* UVD decoder representation */
60 struct pipe_video_codec base
;
64 unsigned stream_handle
;
66 unsigned frame_number
;
68 struct pipe_screen
*screen
;
69 struct radeon_winsys
* ws
;
70 struct radeon_winsys_cs
* cs
;
74 struct rvid_buffer msg_fb_it_buffers
[NUM_BUFFERS
];
80 struct rvid_buffer bs_buffers
[NUM_BUFFERS
];
84 struct rvid_buffer dpb
;
86 struct rvid_buffer ctx
;
87 struct rvid_buffer sessionctx
;
95 void *render_pic_list
[16];
98 /* flush IB to the hardware */
99 static int flush(struct ruvd_decoder
*dec
, unsigned flags
)
101 return dec
->ws
->cs_flush(dec
->cs
, flags
, NULL
);
104 /* add a new set register command to the IB */
105 static void set_reg(struct ruvd_decoder
*dec
, unsigned reg
, uint32_t val
)
107 radeon_emit(dec
->cs
, RUVD_PKT0(reg
>> 2, 0));
108 radeon_emit(dec
->cs
, val
);
111 /* send a command to the VCPU through the GPCOM registers */
112 static void send_cmd(struct ruvd_decoder
*dec
, unsigned cmd
,
113 struct pb_buffer
* buf
, uint32_t off
,
114 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
118 reloc_idx
= dec
->ws
->cs_add_buffer(dec
->cs
, buf
, usage
| RADEON_USAGE_SYNCHRONIZED
,
121 if (!dec
->use_legacy
) {
123 addr
= dec
->ws
->buffer_get_virtual_address(buf
);
125 set_reg(dec
, dec
->reg
.data0
, addr
);
126 set_reg(dec
, dec
->reg
.data1
, addr
>> 32);
128 off
+= dec
->ws
->buffer_get_reloc_offset(buf
);
129 set_reg(dec
, RUVD_GPCOM_VCPU_DATA0
, off
);
130 set_reg(dec
, RUVD_GPCOM_VCPU_DATA1
, reloc_idx
* 4);
132 set_reg(dec
, dec
->reg
.cmd
, cmd
<< 1);
135 /* do the codec needs an IT buffer ?*/
136 static bool have_it(struct ruvd_decoder
*dec
)
138 return dec
->stream_type
== RUVD_CODEC_H264_PERF
||
139 dec
->stream_type
== RUVD_CODEC_H265
;
142 /* map the next available message/feedback/itscaling buffer */
143 static void map_msg_fb_it_buf(struct ruvd_decoder
*dec
)
145 struct rvid_buffer
* buf
;
148 /* grab the current message/feedback buffer */
149 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
151 /* and map it for CPU access */
152 ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
, PIPE_TRANSFER_WRITE
);
154 /* calc buffer offsets */
155 dec
->msg
= (struct ruvd_msg
*)ptr
;
156 memset(dec
->msg
, 0, sizeof(*dec
->msg
));
158 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
160 dec
->it
= (uint8_t *)(ptr
+ FB_BUFFER_OFFSET
+ dec
->fb_size
);
163 /* unmap and send a message command to the VCPU */
164 static void send_msg_buf(struct ruvd_decoder
*dec
)
166 struct rvid_buffer
* buf
;
168 /* ignore the request if message/feedback buffer isn't mapped */
169 if (!dec
->msg
|| !dec
->fb
)
172 /* grab the current message buffer */
173 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
175 /* unmap the buffer */
176 dec
->ws
->buffer_unmap(buf
->res
->buf
);
182 if (dec
->sessionctx
.res
)
183 send_cmd(dec
, RUVD_CMD_SESSION_CONTEXT_BUFFER
,
184 dec
->sessionctx
.res
->buf
, 0, RADEON_USAGE_READWRITE
,
187 /* and send it to the hardware */
188 send_cmd(dec
, RUVD_CMD_MSG_BUFFER
, buf
->res
->buf
, 0,
189 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
192 /* cycle to the next set of buffers */
193 static void next_buffer(struct ruvd_decoder
*dec
)
196 dec
->cur_buffer
%= NUM_BUFFERS
;
199 /* convert the profile into something UVD understands */
200 static uint32_t profile2stream_type(struct ruvd_decoder
*dec
, unsigned family
)
202 switch (u_reduce_video_profile(dec
->base
.profile
)) {
203 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
204 return (family
>= CHIP_TONGA
) ?
205 RUVD_CODEC_H264_PERF
: RUVD_CODEC_H264
;
207 case PIPE_VIDEO_FORMAT_VC1
:
208 return RUVD_CODEC_VC1
;
210 case PIPE_VIDEO_FORMAT_MPEG12
:
211 return RUVD_CODEC_MPEG2
;
213 case PIPE_VIDEO_FORMAT_MPEG4
:
214 return RUVD_CODEC_MPEG4
;
216 case PIPE_VIDEO_FORMAT_HEVC
:
217 return RUVD_CODEC_H265
;
219 case PIPE_VIDEO_FORMAT_JPEG
:
220 return RUVD_CODEC_MJPEG
;
228 static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder
*dec
)
230 unsigned width_in_mb
, height_in_mb
, ctx_size
;
231 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
232 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
234 unsigned max_references
= dec
->base
.max_references
+ 1;
236 // picture width & height in 16 pixel units
237 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
238 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
240 if (!dec
->use_legacy
) {
241 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
242 unsigned num_dpb_buffer
;
243 switch(dec
->base
.level
) {
245 num_dpb_buffer
= 8100 / fs_in_mb
;
248 num_dpb_buffer
= 18000 / fs_in_mb
;
251 num_dpb_buffer
= 20480 / fs_in_mb
;
254 num_dpb_buffer
= 32768 / fs_in_mb
;
257 num_dpb_buffer
= 34816 / fs_in_mb
;
260 num_dpb_buffer
= 110400 / fs_in_mb
;
263 num_dpb_buffer
= 184320 / fs_in_mb
;
266 num_dpb_buffer
= 184320 / fs_in_mb
;
270 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
271 ctx_size
= max_references
* align(width_in_mb
* height_in_mb
* 192, 256);
273 // the firmware seems to always assume a minimum of ref frames
274 max_references
= MAX2(NUM_H264_REFS
, max_references
);
275 // macroblock context buffer
276 ctx_size
= align(width_in_mb
* height_in_mb
* max_references
* 192, 256);
282 static unsigned calc_ctx_size_h265_main(struct ruvd_decoder
*dec
)
284 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
285 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
287 unsigned max_references
= dec
->base
.max_references
+ 1;
289 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
290 max_references
= MAX2(max_references
, 8);
292 max_references
= MAX2(max_references
, 17);
294 width
= align (width
, 16);
295 height
= align (height
, 16);
296 return ((width
+ 255) / 16) * ((height
+ 255) / 16) * 16 * max_references
+ 52 * 1024;
299 static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder
*dec
, struct pipe_h265_picture_desc
*pic
)
301 unsigned block_size
, log2_ctb_size
, width_in_ctb
, height_in_ctb
, num_16x16_block_per_ctb
;
302 unsigned context_buffer_size_per_ctb_row
, cm_buffer_size
, max_mb_address
, db_left_tile_pxl_size
;
303 unsigned db_left_tile_ctx_size
= 4096 / 16 * (32 + 16 * 4);
305 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
306 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
307 unsigned coeff_10bit
= (pic
->pps
->sps
->bit_depth_luma_minus8
|| pic
->pps
->sps
->bit_depth_chroma_minus8
) ? 2 : 1;
309 unsigned max_references
= dec
->base
.max_references
+ 1;
311 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
312 max_references
= MAX2(max_references
, 8);
314 max_references
= MAX2(max_references
, 17);
316 block_size
= (1 << (pic
->pps
->sps
->log2_min_luma_coding_block_size_minus3
+ 3));
317 log2_ctb_size
= block_size
+ pic
->pps
->sps
->log2_diff_max_min_luma_coding_block_size
;
319 width_in_ctb
= (width
+ ((1 << log2_ctb_size
) - 1)) >> log2_ctb_size
;
320 height_in_ctb
= (height
+ ((1 << log2_ctb_size
) - 1)) >> log2_ctb_size
;
322 num_16x16_block_per_ctb
= ((1 << log2_ctb_size
) >> 4) * ((1 << log2_ctb_size
) >> 4);
323 context_buffer_size_per_ctb_row
= align(width_in_ctb
* num_16x16_block_per_ctb
* 16, 256);
324 max_mb_address
= (unsigned) ceil(height
* 8 / 2048.0);
326 cm_buffer_size
= max_references
* context_buffer_size_per_ctb_row
* height_in_ctb
;
327 db_left_tile_pxl_size
= coeff_10bit
* (max_mb_address
* 2 * 2048 + 1024);
329 return cm_buffer_size
+ db_left_tile_ctx_size
+ db_left_tile_pxl_size
;
332 static unsigned get_db_pitch_alignment(struct ruvd_decoder
*dec
)
334 if (((struct si_screen
*)dec
->screen
)->info
.family
< CHIP_VEGA10
)
340 /* calculate size of reference picture buffer */
341 static unsigned calc_dpb_size(struct ruvd_decoder
*dec
)
343 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
345 // always align them to MB size for dpb calculation
346 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
347 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
349 // always one more for currently decoded picture
350 unsigned max_references
= dec
->base
.max_references
+ 1;
352 // aligned size of a single frame
353 image_size
= align(width
, get_db_pitch_alignment(dec
)) * height
;
354 image_size
+= image_size
/ 2;
355 image_size
= align(image_size
, 1024);
357 // picture width & height in 16 pixel units
358 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
359 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
361 switch (u_reduce_video_profile(dec
->base
.profile
)) {
362 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
363 if (!dec
->use_legacy
) {
364 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
365 unsigned alignment
= 64, num_dpb_buffer
;
367 if (dec
->stream_type
== RUVD_CODEC_H264_PERF
)
369 switch(dec
->base
.level
) {
371 num_dpb_buffer
= 8100 / fs_in_mb
;
374 num_dpb_buffer
= 18000 / fs_in_mb
;
377 num_dpb_buffer
= 20480 / fs_in_mb
;
380 num_dpb_buffer
= 32768 / fs_in_mb
;
383 num_dpb_buffer
= 34816 / fs_in_mb
;
386 num_dpb_buffer
= 110400 / fs_in_mb
;
389 num_dpb_buffer
= 184320 / fs_in_mb
;
392 num_dpb_buffer
= 184320 / fs_in_mb
;
396 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
397 dpb_size
= image_size
* max_references
;
398 if ((dec
->stream_type
!= RUVD_CODEC_H264_PERF
) ||
399 (((struct si_screen
*)dec
->screen
)->info
.family
< CHIP_POLARIS10
)) {
400 dpb_size
+= max_references
* align(width_in_mb
* height_in_mb
* 192, alignment
);
401 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, alignment
);
404 // the firmware seems to allways assume a minimum of ref frames
405 max_references
= MAX2(NUM_H264_REFS
, max_references
);
406 // reference picture buffer
407 dpb_size
= image_size
* max_references
;
408 if ((dec
->stream_type
!= RUVD_CODEC_H264_PERF
) ||
409 (((struct si_screen
*)dec
->screen
)->info
.family
< CHIP_POLARIS10
)) {
410 // macroblock context buffer
411 dpb_size
+= width_in_mb
* height_in_mb
* max_references
* 192;
413 dpb_size
+= width_in_mb
* height_in_mb
* 32;
419 case PIPE_VIDEO_FORMAT_HEVC
:
420 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
421 max_references
= MAX2(max_references
, 8);
423 max_references
= MAX2(max_references
, 17);
425 width
= align (width
, 16);
426 height
= align (height
, 16);
427 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
428 dpb_size
= align((align(width
, get_db_pitch_alignment(dec
)) * height
* 9) / 4, 256) * max_references
;
430 dpb_size
= align((align(width
, get_db_pitch_alignment(dec
)) * height
* 3) / 2, 256) * max_references
;
433 case PIPE_VIDEO_FORMAT_VC1
:
434 // the firmware seems to allways assume a minimum of ref frames
435 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
437 // reference picture buffer
438 dpb_size
= image_size
* max_references
;
441 dpb_size
+= width_in_mb
* height_in_mb
* 128;
444 dpb_size
+= width_in_mb
* 64;
447 dpb_size
+= width_in_mb
* 128;
450 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
453 case PIPE_VIDEO_FORMAT_MPEG12
:
454 // reference picture buffer, must be big enough for all frames
455 dpb_size
= image_size
* NUM_MPEG2_REFS
;
458 case PIPE_VIDEO_FORMAT_MPEG4
:
459 // reference picture buffer
460 dpb_size
= image_size
* max_references
;
463 dpb_size
+= width_in_mb
* height_in_mb
* 64;
466 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
468 dpb_size
= MAX2(dpb_size
, 30 * 1024 * 1024);
471 case PIPE_VIDEO_FORMAT_JPEG
:
476 // something is missing here
479 // at least use a sane default value
480 dpb_size
= 32 * 1024 * 1024;
486 /* free associated data in the video buffer callback */
487 static void ruvd_destroy_associated_data(void *data
)
489 /* NOOP, since we only use an intptr */
492 /* get h264 specific message bits */
493 static struct ruvd_h264
get_h264_msg(struct ruvd_decoder
*dec
, struct pipe_h264_picture_desc
*pic
)
495 struct ruvd_h264 result
;
497 memset(&result
, 0, sizeof(result
));
498 switch (pic
->base
.profile
) {
499 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE
:
500 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE
:
501 result
.profile
= RUVD_H264_PROFILE_BASELINE
;
504 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN
:
505 result
.profile
= RUVD_H264_PROFILE_MAIN
;
508 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
:
509 result
.profile
= RUVD_H264_PROFILE_HIGH
;
517 result
.level
= dec
->base
.level
;
519 result
.sps_info_flags
= 0;
520 result
.sps_info_flags
|= pic
->pps
->sps
->direct_8x8_inference_flag
<< 0;
521 result
.sps_info_flags
|= pic
->pps
->sps
->mb_adaptive_frame_field_flag
<< 1;
522 result
.sps_info_flags
|= pic
->pps
->sps
->frame_mbs_only_flag
<< 2;
523 result
.sps_info_flags
|= pic
->pps
->sps
->delta_pic_order_always_zero_flag
<< 3;
525 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
526 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
527 result
.log2_max_frame_num_minus4
= pic
->pps
->sps
->log2_max_frame_num_minus4
;
528 result
.pic_order_cnt_type
= pic
->pps
->sps
->pic_order_cnt_type
;
529 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
531 switch (dec
->base
.chroma_format
) {
532 case PIPE_VIDEO_CHROMA_FORMAT_NONE
:
535 case PIPE_VIDEO_CHROMA_FORMAT_400
:
536 result
.chroma_format
= 0;
538 case PIPE_VIDEO_CHROMA_FORMAT_420
:
539 result
.chroma_format
= 1;
541 case PIPE_VIDEO_CHROMA_FORMAT_422
:
542 result
.chroma_format
= 2;
544 case PIPE_VIDEO_CHROMA_FORMAT_444
:
545 result
.chroma_format
= 3;
549 result
.pps_info_flags
= 0;
550 result
.pps_info_flags
|= pic
->pps
->transform_8x8_mode_flag
<< 0;
551 result
.pps_info_flags
|= pic
->pps
->redundant_pic_cnt_present_flag
<< 1;
552 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 2;
553 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_control_present_flag
<< 3;
554 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_idc
<< 4;
555 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 6;
556 result
.pps_info_flags
|= pic
->pps
->bottom_field_pic_order_in_frame_present_flag
<< 7;
557 result
.pps_info_flags
|= pic
->pps
->entropy_coding_mode_flag
<< 8;
559 result
.num_slice_groups_minus1
= pic
->pps
->num_slice_groups_minus1
;
560 result
.slice_group_map_type
= pic
->pps
->slice_group_map_type
;
561 result
.slice_group_change_rate_minus1
= pic
->pps
->slice_group_change_rate_minus1
;
562 result
.pic_init_qp_minus26
= pic
->pps
->pic_init_qp_minus26
;
563 result
.chroma_qp_index_offset
= pic
->pps
->chroma_qp_index_offset
;
564 result
.second_chroma_qp_index_offset
= pic
->pps
->second_chroma_qp_index_offset
;
566 memcpy(result
.scaling_list_4x4
, pic
->pps
->ScalingList4x4
, 6*16);
567 memcpy(result
.scaling_list_8x8
, pic
->pps
->ScalingList8x8
, 2*64);
569 if (dec
->stream_type
== RUVD_CODEC_H264_PERF
) {
570 memcpy(dec
->it
, result
.scaling_list_4x4
, 6*16);
571 memcpy((dec
->it
+ 96), result
.scaling_list_8x8
, 2*64);
574 result
.num_ref_frames
= pic
->num_ref_frames
;
576 result
.num_ref_idx_l0_active_minus1
= pic
->num_ref_idx_l0_active_minus1
;
577 result
.num_ref_idx_l1_active_minus1
= pic
->num_ref_idx_l1_active_minus1
;
579 result
.frame_num
= pic
->frame_num
;
580 memcpy(result
.frame_num_list
, pic
->frame_num_list
, 4*16);
581 result
.curr_field_order_cnt_list
[0] = pic
->field_order_cnt
[0];
582 result
.curr_field_order_cnt_list
[1] = pic
->field_order_cnt
[1];
583 memcpy(result
.field_order_cnt_list
, pic
->field_order_cnt_list
, 4*16*2);
585 result
.decoded_pic_idx
= pic
->frame_num
;
590 /* get h265 specific message bits */
591 static struct ruvd_h265
get_h265_msg(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*target
,
592 struct pipe_h265_picture_desc
*pic
)
594 struct ruvd_h265 result
;
597 memset(&result
, 0, sizeof(result
));
599 result
.sps_info_flags
= 0;
600 result
.sps_info_flags
|= pic
->pps
->sps
->scaling_list_enabled_flag
<< 0;
601 result
.sps_info_flags
|= pic
->pps
->sps
->amp_enabled_flag
<< 1;
602 result
.sps_info_flags
|= pic
->pps
->sps
->sample_adaptive_offset_enabled_flag
<< 2;
603 result
.sps_info_flags
|= pic
->pps
->sps
->pcm_enabled_flag
<< 3;
604 result
.sps_info_flags
|= pic
->pps
->sps
->pcm_loop_filter_disabled_flag
<< 4;
605 result
.sps_info_flags
|= pic
->pps
->sps
->long_term_ref_pics_present_flag
<< 5;
606 result
.sps_info_flags
|= pic
->pps
->sps
->sps_temporal_mvp_enabled_flag
<< 6;
607 result
.sps_info_flags
|= pic
->pps
->sps
->strong_intra_smoothing_enabled_flag
<< 7;
608 result
.sps_info_flags
|= pic
->pps
->sps
->separate_colour_plane_flag
<< 8;
609 if (((struct si_screen
*)dec
->screen
)->info
.family
== CHIP_CARRIZO
)
610 result
.sps_info_flags
|= 1 << 9;
611 if (pic
->UseRefPicList
== true)
612 result
.sps_info_flags
|= 1 << 10;
614 result
.chroma_format
= pic
->pps
->sps
->chroma_format_idc
;
615 result
.bit_depth_luma_minus8
= pic
->pps
->sps
->bit_depth_luma_minus8
;
616 result
.bit_depth_chroma_minus8
= pic
->pps
->sps
->bit_depth_chroma_minus8
;
617 result
.log2_max_pic_order_cnt_lsb_minus4
= pic
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
618 result
.sps_max_dec_pic_buffering_minus1
= pic
->pps
->sps
->sps_max_dec_pic_buffering_minus1
;
619 result
.log2_min_luma_coding_block_size_minus3
= pic
->pps
->sps
->log2_min_luma_coding_block_size_minus3
;
620 result
.log2_diff_max_min_luma_coding_block_size
= pic
->pps
->sps
->log2_diff_max_min_luma_coding_block_size
;
621 result
.log2_min_transform_block_size_minus2
= pic
->pps
->sps
->log2_min_transform_block_size_minus2
;
622 result
.log2_diff_max_min_transform_block_size
= pic
->pps
->sps
->log2_diff_max_min_transform_block_size
;
623 result
.max_transform_hierarchy_depth_inter
= pic
->pps
->sps
->max_transform_hierarchy_depth_inter
;
624 result
.max_transform_hierarchy_depth_intra
= pic
->pps
->sps
->max_transform_hierarchy_depth_intra
;
625 result
.pcm_sample_bit_depth_luma_minus1
= pic
->pps
->sps
->pcm_sample_bit_depth_luma_minus1
;
626 result
.pcm_sample_bit_depth_chroma_minus1
= pic
->pps
->sps
->pcm_sample_bit_depth_chroma_minus1
;
627 result
.log2_min_pcm_luma_coding_block_size_minus3
= pic
->pps
->sps
->log2_min_pcm_luma_coding_block_size_minus3
;
628 result
.log2_diff_max_min_pcm_luma_coding_block_size
= pic
->pps
->sps
->log2_diff_max_min_pcm_luma_coding_block_size
;
629 result
.num_short_term_ref_pic_sets
= pic
->pps
->sps
->num_short_term_ref_pic_sets
;
631 result
.pps_info_flags
= 0;
632 result
.pps_info_flags
|= pic
->pps
->dependent_slice_segments_enabled_flag
<< 0;
633 result
.pps_info_flags
|= pic
->pps
->output_flag_present_flag
<< 1;
634 result
.pps_info_flags
|= pic
->pps
->sign_data_hiding_enabled_flag
<< 2;
635 result
.pps_info_flags
|= pic
->pps
->cabac_init_present_flag
<< 3;
636 result
.pps_info_flags
|= pic
->pps
->constrained_intra_pred_flag
<< 4;
637 result
.pps_info_flags
|= pic
->pps
->transform_skip_enabled_flag
<< 5;
638 result
.pps_info_flags
|= pic
->pps
->cu_qp_delta_enabled_flag
<< 6;
639 result
.pps_info_flags
|= pic
->pps
->pps_slice_chroma_qp_offsets_present_flag
<< 7;
640 result
.pps_info_flags
|= pic
->pps
->weighted_pred_flag
<< 8;
641 result
.pps_info_flags
|= pic
->pps
->weighted_bipred_flag
<< 9;
642 result
.pps_info_flags
|= pic
->pps
->transquant_bypass_enabled_flag
<< 10;
643 result
.pps_info_flags
|= pic
->pps
->tiles_enabled_flag
<< 11;
644 result
.pps_info_flags
|= pic
->pps
->entropy_coding_sync_enabled_flag
<< 12;
645 result
.pps_info_flags
|= pic
->pps
->uniform_spacing_flag
<< 13;
646 result
.pps_info_flags
|= pic
->pps
->loop_filter_across_tiles_enabled_flag
<< 14;
647 result
.pps_info_flags
|= pic
->pps
->pps_loop_filter_across_slices_enabled_flag
<< 15;
648 result
.pps_info_flags
|= pic
->pps
->deblocking_filter_override_enabled_flag
<< 16;
649 result
.pps_info_flags
|= pic
->pps
->pps_deblocking_filter_disabled_flag
<< 17;
650 result
.pps_info_flags
|= pic
->pps
->lists_modification_present_flag
<< 18;
651 result
.pps_info_flags
|= pic
->pps
->slice_segment_header_extension_present_flag
<< 19;
652 //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
654 result
.num_extra_slice_header_bits
= pic
->pps
->num_extra_slice_header_bits
;
655 result
.num_long_term_ref_pic_sps
= pic
->pps
->sps
->num_long_term_ref_pics_sps
;
656 result
.num_ref_idx_l0_default_active_minus1
= pic
->pps
->num_ref_idx_l0_default_active_minus1
;
657 result
.num_ref_idx_l1_default_active_minus1
= pic
->pps
->num_ref_idx_l1_default_active_minus1
;
658 result
.pps_cb_qp_offset
= pic
->pps
->pps_cb_qp_offset
;
659 result
.pps_cr_qp_offset
= pic
->pps
->pps_cr_qp_offset
;
660 result
.pps_beta_offset_div2
= pic
->pps
->pps_beta_offset_div2
;
661 result
.pps_tc_offset_div2
= pic
->pps
->pps_tc_offset_div2
;
662 result
.diff_cu_qp_delta_depth
= pic
->pps
->diff_cu_qp_delta_depth
;
663 result
.num_tile_columns_minus1
= pic
->pps
->num_tile_columns_minus1
;
664 result
.num_tile_rows_minus1
= pic
->pps
->num_tile_rows_minus1
;
665 result
.log2_parallel_merge_level_minus2
= pic
->pps
->log2_parallel_merge_level_minus2
;
666 result
.init_qp_minus26
= pic
->pps
->init_qp_minus26
;
668 for (i
= 0; i
< 19; ++i
)
669 result
.column_width_minus1
[i
] = pic
->pps
->column_width_minus1
[i
];
671 for (i
= 0; i
< 21; ++i
)
672 result
.row_height_minus1
[i
] = pic
->pps
->row_height_minus1
[i
];
674 result
.num_delta_pocs_ref_rps_idx
= pic
->NumDeltaPocsOfRefRpsIdx
;
675 result
.curr_poc
= pic
->CurrPicOrderCntVal
;
677 for (i
= 0 ; i
< 16 ; i
++) {
678 for (j
= 0; (pic
->ref
[j
] != NULL
) && (j
< 16) ; j
++) {
679 if (dec
->render_pic_list
[i
] == pic
->ref
[j
])
682 dec
->render_pic_list
[i
] = NULL
;
683 else if (pic
->ref
[j
+1] == NULL
)
684 dec
->render_pic_list
[i
] = NULL
;
687 for (i
= 0 ; i
< 16 ; i
++) {
688 if (dec
->render_pic_list
[i
] == NULL
) {
689 dec
->render_pic_list
[i
] = target
;
695 vl_video_buffer_set_associated_data(target
, &dec
->base
,
696 (void *)(uintptr_t)result
.curr_idx
,
697 &ruvd_destroy_associated_data
);
699 for (i
= 0; i
< 16; ++i
) {
700 struct pipe_video_buffer
*ref
= pic
->ref
[i
];
701 uintptr_t ref_pic
= 0;
703 result
.poc_list
[i
] = pic
->PicOrderCntVal
[i
];
706 ref_pic
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
709 result
.ref_pic_list
[i
] = ref_pic
;
712 for (i
= 0; i
< 8; ++i
) {
713 result
.ref_pic_set_st_curr_before
[i
] = 0xFF;
714 result
.ref_pic_set_st_curr_after
[i
] = 0xFF;
715 result
.ref_pic_set_lt_curr
[i
] = 0xFF;
718 for (i
= 0; i
< pic
->NumPocStCurrBefore
; ++i
)
719 result
.ref_pic_set_st_curr_before
[i
] = pic
->RefPicSetStCurrBefore
[i
];
721 for (i
= 0; i
< pic
->NumPocStCurrAfter
; ++i
)
722 result
.ref_pic_set_st_curr_after
[i
] = pic
->RefPicSetStCurrAfter
[i
];
724 for (i
= 0; i
< pic
->NumPocLtCurr
; ++i
)
725 result
.ref_pic_set_lt_curr
[i
] = pic
->RefPicSetLtCurr
[i
];
727 for (i
= 0; i
< 6; ++i
)
728 result
.ucScalingListDCCoefSizeID2
[i
] = pic
->pps
->sps
->ScalingListDCCoeff16x16
[i
];
730 for (i
= 0; i
< 2; ++i
)
731 result
.ucScalingListDCCoefSizeID3
[i
] = pic
->pps
->sps
->ScalingListDCCoeff32x32
[i
];
733 memcpy(dec
->it
, pic
->pps
->sps
->ScalingList4x4
, 6 * 16);
734 memcpy(dec
->it
+ 96, pic
->pps
->sps
->ScalingList8x8
, 6 * 64);
735 memcpy(dec
->it
+ 480, pic
->pps
->sps
->ScalingList16x16
, 6 * 64);
736 memcpy(dec
->it
+ 864, pic
->pps
->sps
->ScalingList32x32
, 2 * 64);
738 for (i
= 0 ; i
< 2 ; i
++) {
739 for (j
= 0 ; j
< 15 ; j
++)
740 result
.direct_reflist
[i
][j
] = pic
->RefPicList
[i
][j
];
743 if (pic
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
) {
744 if (target
->buffer_format
== PIPE_FORMAT_P016
) {
745 result
.p010_mode
= 1;
748 result
.luma_10to8
= 5;
749 result
.chroma_10to8
= 5;
750 result
.sclr_luma10to8
= 4;
751 result
.sclr_chroma10to8
= 4;
762 NumShortTermPictureSliceHeaderBits;
763 NumLongTermPictureSliceHeaderBits;
771 /* get vc1 specific message bits */
772 static struct ruvd_vc1
get_vc1_msg(struct pipe_vc1_picture_desc
*pic
)
774 struct ruvd_vc1 result
;
776 memset(&result
, 0, sizeof(result
));
778 switch(pic
->base
.profile
) {
779 case PIPE_VIDEO_PROFILE_VC1_SIMPLE
:
780 result
.profile
= RUVD_VC1_PROFILE_SIMPLE
;
784 case PIPE_VIDEO_PROFILE_VC1_MAIN
:
785 result
.profile
= RUVD_VC1_PROFILE_MAIN
;
789 case PIPE_VIDEO_PROFILE_VC1_ADVANCED
:
790 result
.profile
= RUVD_VC1_PROFILE_ADVANCED
;
798 /* fields common for all profiles */
799 result
.sps_info_flags
|= pic
->postprocflag
<< 7;
800 result
.sps_info_flags
|= pic
->pulldown
<< 6;
801 result
.sps_info_flags
|= pic
->interlace
<< 5;
802 result
.sps_info_flags
|= pic
->tfcntrflag
<< 4;
803 result
.sps_info_flags
|= pic
->finterpflag
<< 3;
804 result
.sps_info_flags
|= pic
->psf
<< 1;
806 result
.pps_info_flags
|= pic
->range_mapy_flag
<< 31;
807 result
.pps_info_flags
|= pic
->range_mapy
<< 28;
808 result
.pps_info_flags
|= pic
->range_mapuv_flag
<< 27;
809 result
.pps_info_flags
|= pic
->range_mapuv
<< 24;
810 result
.pps_info_flags
|= pic
->multires
<< 21;
811 result
.pps_info_flags
|= pic
->maxbframes
<< 16;
812 result
.pps_info_flags
|= pic
->overlap
<< 11;
813 result
.pps_info_flags
|= pic
->quantizer
<< 9;
814 result
.pps_info_flags
|= pic
->panscan_flag
<< 7;
815 result
.pps_info_flags
|= pic
->refdist_flag
<< 6;
816 result
.pps_info_flags
|= pic
->vstransform
<< 0;
818 /* some fields only apply to main/advanced profile */
819 if (pic
->base
.profile
!= PIPE_VIDEO_PROFILE_VC1_SIMPLE
) {
820 result
.pps_info_flags
|= pic
->syncmarker
<< 20;
821 result
.pps_info_flags
|= pic
->rangered
<< 19;
822 result
.pps_info_flags
|= pic
->loopfilter
<< 5;
823 result
.pps_info_flags
|= pic
->fastuvmc
<< 4;
824 result
.pps_info_flags
|= pic
->extended_mv
<< 3;
825 result
.pps_info_flags
|= pic
->extended_dmv
<< 8;
826 result
.pps_info_flags
|= pic
->dquant
<< 1;
829 result
.chroma_format
= 1;
832 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
835 uint8_t frame_coding_mode
836 uint8_t deblockEnable
843 /* extract the frame number from a referenced video buffer */
844 static uint32_t get_ref_pic_idx(struct ruvd_decoder
*dec
, struct pipe_video_buffer
*ref
)
846 uint32_t min
= MAX2(dec
->frame_number
, NUM_MPEG2_REFS
) - NUM_MPEG2_REFS
;
847 uint32_t max
= MAX2(dec
->frame_number
, 1) - 1;
850 /* seems to be the most sane fallback */
854 /* get the frame number from the associated data */
855 frame
= (uintptr_t)vl_video_buffer_get_associated_data(ref
, &dec
->base
);
857 /* limit the frame number to a valid range */
858 return MAX2(MIN2(frame
, max
), min
);
861 /* get mpeg2 specific msg bits */
862 static struct ruvd_mpeg2
get_mpeg2_msg(struct ruvd_decoder
*dec
,
863 struct pipe_mpeg12_picture_desc
*pic
)
865 const int *zscan
= pic
->alternate_scan
? vl_zscan_alternate
: vl_zscan_normal
;
866 struct ruvd_mpeg2 result
;
869 memset(&result
, 0, sizeof(result
));
870 result
.decoded_pic_idx
= dec
->frame_number
;
871 for (i
= 0; i
< 2; ++i
)
872 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
874 if(pic
->intra_matrix
) {
875 result
.load_intra_quantiser_matrix
= 1;
876 for (i
= 0; i
< 64; ++i
) {
877 result
.intra_quantiser_matrix
[i
] = pic
->intra_matrix
[zscan
[i
]];
880 if(pic
->non_intra_matrix
) {
881 result
.load_nonintra_quantiser_matrix
= 1;
882 for (i
= 0; i
< 64; ++i
) {
883 result
.nonintra_quantiser_matrix
[i
] = pic
->non_intra_matrix
[zscan
[i
]];
887 result
.profile_and_level_indication
= 0;
888 result
.chroma_format
= 0x1;
890 result
.picture_coding_type
= pic
->picture_coding_type
;
891 result
.f_code
[0][0] = pic
->f_code
[0][0] + 1;
892 result
.f_code
[0][1] = pic
->f_code
[0][1] + 1;
893 result
.f_code
[1][0] = pic
->f_code
[1][0] + 1;
894 result
.f_code
[1][1] = pic
->f_code
[1][1] + 1;
895 result
.intra_dc_precision
= pic
->intra_dc_precision
;
896 result
.pic_structure
= pic
->picture_structure
;
897 result
.top_field_first
= pic
->top_field_first
;
898 result
.frame_pred_frame_dct
= pic
->frame_pred_frame_dct
;
899 result
.concealment_motion_vectors
= pic
->concealment_motion_vectors
;
900 result
.q_scale_type
= pic
->q_scale_type
;
901 result
.intra_vlc_format
= pic
->intra_vlc_format
;
902 result
.alternate_scan
= pic
->alternate_scan
;
907 /* get mpeg4 specific msg bits */
908 static struct ruvd_mpeg4
get_mpeg4_msg(struct ruvd_decoder
*dec
,
909 struct pipe_mpeg4_picture_desc
*pic
)
911 struct ruvd_mpeg4 result
;
914 memset(&result
, 0, sizeof(result
));
915 result
.decoded_pic_idx
= dec
->frame_number
;
916 for (i
= 0; i
< 2; ++i
)
917 result
.ref_pic_idx
[i
] = get_ref_pic_idx(dec
, pic
->ref
[i
]);
919 result
.variant_type
= 0;
920 result
.profile_and_level_indication
= 0xF0; // ASP Level0
922 result
.video_object_layer_verid
= 0x5; // advanced simple
923 result
.video_object_layer_shape
= 0x0; // rectangular
925 result
.video_object_layer_width
= dec
->base
.width
;
926 result
.video_object_layer_height
= dec
->base
.height
;
928 result
.vop_time_increment_resolution
= pic
->vop_time_increment_resolution
;
930 result
.flags
|= pic
->short_video_header
<< 0;
931 //result.flags |= obmc_disable << 1;
932 result
.flags
|= pic
->interlaced
<< 2;
933 result
.flags
|= 1 << 3; // load_intra_quant_mat
934 result
.flags
|= 1 << 4; // load_nonintra_quant_mat
935 result
.flags
|= pic
->quarter_sample
<< 5;
936 result
.flags
|= 1 << 6; // complexity_estimation_disable
937 result
.flags
|= pic
->resync_marker_disable
<< 7;
938 //result.flags |= data_partitioned << 8;
939 //result.flags |= reversible_vlc << 9;
940 result
.flags
|= 0 << 10; // newpred_enable
941 result
.flags
|= 0 << 11; // reduced_resolution_vop_enable
942 //result.flags |= scalability << 12;
943 //result.flags |= is_object_layer_identifier << 13;
944 //result.flags |= fixed_vop_rate << 14;
945 //result.flags |= newpred_segment_type << 15;
947 result
.quant_type
= pic
->quant_type
;
949 for (i
= 0; i
< 64; ++i
) {
950 result
.intra_quant_mat
[i
] = pic
->intra_matrix
[vl_zscan_normal
[i
]];
951 result
.nonintra_quant_mat
[i
] = pic
->non_intra_matrix
[vl_zscan_normal
[i
]];
957 uint8_t vop_coding_type
958 uint8_t vop_fcode_forward
959 uint8_t vop_fcode_backward
960 uint8_t rounding_control
961 uint8_t alternate_vertical_scan_flag
962 uint8_t top_field_first
968 static void get_mjpeg_slice_header(struct ruvd_decoder
*dec
, struct pipe_mjpeg_picture_desc
*pic
)
970 int size
= 0, saved_size
, len_pos
, i
;
972 uint8_t *buf
= dec
->bs_ptr
;
985 for (i
= 0; i
< 4; ++i
) {
986 if (pic
->quantization_table
.load_quantiser_table
[i
] == 0)
990 memcpy((buf
+ size
), &pic
->quantization_table
.quantiser_table
[i
], 64);
994 bs
= (uint16_t*)&buf
[len_pos
];
995 *bs
= util_bswap16(size
- 4);
1006 for (i
= 0; i
< 2; ++i
) {
1007 if (pic
->huffman_table
.load_huffman_table
[i
] == 0)
1010 buf
[size
++] = 0x00 | i
;
1011 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].num_dc_codes
, 16);
1013 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].dc_values
, 12);
1017 for (i
= 0; i
< 2; ++i
) {
1018 if (pic
->huffman_table
.load_huffman_table
[i
] == 0)
1021 buf
[size
++] = 0x10 | i
;
1022 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].num_ac_codes
, 16);
1024 memcpy((buf
+ size
), &pic
->huffman_table
.table
[i
].ac_values
, 162);
1028 bs
= (uint16_t*)&buf
[len_pos
];
1029 *bs
= util_bswap16(size
- saved_size
- 2);
1034 if (pic
->slice_parameter
.restart_interval
) {
1039 bs
= (uint16_t*)&buf
[size
++];
1040 *bs
= util_bswap16(pic
->slice_parameter
.restart_interval
);
1041 saved_size
= ++size
;
1053 bs
= (uint16_t*)&buf
[size
++];
1054 *bs
= util_bswap16(pic
->picture_parameter
.picture_height
);
1057 bs
= (uint16_t*)&buf
[size
++];
1058 *bs
= util_bswap16(pic
->picture_parameter
.picture_width
);
1061 buf
[size
++] = pic
->picture_parameter
.num_components
;
1063 for (i
= 0; i
< pic
->picture_parameter
.num_components
; ++i
) {
1064 buf
[size
++] = pic
->picture_parameter
.components
[i
].component_id
;
1065 buf
[size
++] = pic
->picture_parameter
.components
[i
].h_sampling_factor
<< 4 |
1066 pic
->picture_parameter
.components
[i
].v_sampling_factor
;
1067 buf
[size
++] = pic
->picture_parameter
.components
[i
].quantiser_table_selector
;
1070 bs
= (uint16_t*)&buf
[len_pos
];
1071 *bs
= util_bswap16(size
- saved_size
- 2);
1082 buf
[size
++] = pic
->slice_parameter
.num_components
;
1084 for (i
= 0; i
< pic
->slice_parameter
.num_components
; ++i
) {
1085 buf
[size
++] = pic
->slice_parameter
.components
[i
].component_selector
;
1086 buf
[size
++] = pic
->slice_parameter
.components
[i
].dc_table_selector
<< 4 |
1087 pic
->slice_parameter
.components
[i
].ac_table_selector
;
1094 bs
= (uint16_t*)&buf
[len_pos
];
1095 *bs
= util_bswap16(size
- saved_size
- 2);
1097 dec
->bs_ptr
+= size
;
1098 dec
->bs_size
+= size
;
1102 * destroy this video decoder
1104 static void ruvd_destroy(struct pipe_video_codec
*decoder
)
1106 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
1111 map_msg_fb_it_buf(dec
);
1112 dec
->msg
->size
= sizeof(*dec
->msg
);
1113 dec
->msg
->msg_type
= RUVD_MSG_DESTROY
;
1114 dec
->msg
->stream_handle
= dec
->stream_handle
;
1119 dec
->ws
->cs_destroy(dec
->cs
);
1121 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1122 si_vid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
1123 si_vid_destroy_buffer(&dec
->bs_buffers
[i
]);
1126 si_vid_destroy_buffer(&dec
->dpb
);
1127 si_vid_destroy_buffer(&dec
->ctx
);
1128 si_vid_destroy_buffer(&dec
->sessionctx
);
1134 * start decoding of a new frame
1136 static void ruvd_begin_frame(struct pipe_video_codec
*decoder
,
1137 struct pipe_video_buffer
*target
,
1138 struct pipe_picture_desc
*picture
)
1140 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
1145 frame
= ++dec
->frame_number
;
1146 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
1147 &ruvd_destroy_associated_data
);
1150 dec
->bs_ptr
= dec
->ws
->buffer_map(
1151 dec
->bs_buffers
[dec
->cur_buffer
].res
->buf
,
1152 dec
->cs
, PIPE_TRANSFER_WRITE
);
1156 * decode a macroblock
1158 static void ruvd_decode_macroblock(struct pipe_video_codec
*decoder
,
1159 struct pipe_video_buffer
*target
,
1160 struct pipe_picture_desc
*picture
,
1161 const struct pipe_macroblock
*macroblocks
,
1162 unsigned num_macroblocks
)
1164 /* not supported (yet) */
1169 * decode a bitstream
1171 static void ruvd_decode_bitstream(struct pipe_video_codec
*decoder
,
1172 struct pipe_video_buffer
*target
,
1173 struct pipe_picture_desc
*picture
,
1174 unsigned num_buffers
,
1175 const void * const *buffers
,
1176 const unsigned *sizes
)
1178 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
1179 enum pipe_video_format format
= u_reduce_video_profile(picture
->profile
);
1187 if (format
== PIPE_VIDEO_FORMAT_JPEG
)
1188 get_mjpeg_slice_header(dec
, (struct pipe_mjpeg_picture_desc
*)picture
);
1190 for (i
= 0; i
< num_buffers
; ++i
) {
1191 struct rvid_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
1192 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
1194 if (format
== PIPE_VIDEO_FORMAT_JPEG
)
1195 new_size
+= 2; /* save for EOI */
1197 if (new_size
> buf
->res
->buf
->size
) {
1198 dec
->ws
->buffer_unmap(buf
->res
->buf
);
1199 if (!si_vid_resize_buffer(dec
->screen
, dec
->cs
, buf
, new_size
)) {
1200 RVID_ERR("Can't resize bitstream buffer!");
1204 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
1205 PIPE_TRANSFER_WRITE
);
1209 dec
->bs_ptr
+= dec
->bs_size
;
1212 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
1213 dec
->bs_size
+= sizes
[i
];
1214 dec
->bs_ptr
+= sizes
[i
];
1217 if (format
== PIPE_VIDEO_FORMAT_JPEG
) {
1218 ((uint8_t *)dec
->bs_ptr
)[0] = 0xff; /* EOI */
1219 ((uint8_t *)dec
->bs_ptr
)[1] = 0xd9;
1226 * end decoding of the current frame
1228 static void ruvd_end_frame(struct pipe_video_codec
*decoder
,
1229 struct pipe_video_buffer
*target
,
1230 struct pipe_picture_desc
*picture
)
1232 struct ruvd_decoder
*dec
= (struct ruvd_decoder
*)decoder
;
1233 struct pb_buffer
*dt
;
1234 struct rvid_buffer
*msg_fb_it_buf
, *bs_buf
;
1242 msg_fb_it_buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
1243 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
1245 bs_size
= align(dec
->bs_size
, 128);
1246 memset(dec
->bs_ptr
, 0, bs_size
- dec
->bs_size
);
1247 dec
->ws
->buffer_unmap(bs_buf
->res
->buf
);
1249 map_msg_fb_it_buf(dec
);
1250 dec
->msg
->size
= sizeof(*dec
->msg
);
1251 dec
->msg
->msg_type
= RUVD_MSG_DECODE
;
1252 dec
->msg
->stream_handle
= dec
->stream_handle
;
1253 dec
->msg
->status_report_feedback_number
= dec
->frame_number
;
1255 dec
->msg
->body
.decode
.stream_type
= dec
->stream_type
;
1256 dec
->msg
->body
.decode
.decode_flags
= 0x1;
1257 dec
->msg
->body
.decode
.width_in_samples
= dec
->base
.width
;
1258 dec
->msg
->body
.decode
.height_in_samples
= dec
->base
.height
;
1260 if ((picture
->profile
== PIPE_VIDEO_PROFILE_VC1_SIMPLE
) ||
1261 (picture
->profile
== PIPE_VIDEO_PROFILE_VC1_MAIN
)) {
1262 dec
->msg
->body
.decode
.width_in_samples
= align(dec
->msg
->body
.decode
.width_in_samples
, 16) / 16;
1263 dec
->msg
->body
.decode
.height_in_samples
= align(dec
->msg
->body
.decode
.height_in_samples
, 16) / 16;
1267 dec
->msg
->body
.decode
.dpb_size
= dec
->dpb
.res
->buf
->size
;
1268 dec
->msg
->body
.decode
.bsd_size
= bs_size
;
1269 dec
->msg
->body
.decode
.db_pitch
= align(dec
->base
.width
, get_db_pitch_alignment(dec
));
1271 if (dec
->stream_type
== RUVD_CODEC_H264_PERF
&&
1272 ((struct si_screen
*)dec
->screen
)->info
.family
>= CHIP_POLARIS10
)
1273 dec
->msg
->body
.decode
.dpb_reserved
= dec
->ctx
.res
->buf
->size
;
1275 dt
= dec
->set_dtb(dec
->msg
, (struct vl_video_buffer
*)target
);
1276 if (((struct si_screen
*)dec
->screen
)->info
.family
>= CHIP_STONEY
)
1277 dec
->msg
->body
.decode
.dt_wa_chroma_top_offset
= dec
->msg
->body
.decode
.dt_pitch
/ 2;
1279 switch (u_reduce_video_profile(picture
->profile
)) {
1280 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1281 dec
->msg
->body
.decode
.codec
.h264
= get_h264_msg(dec
, (struct pipe_h264_picture_desc
*)picture
);
1284 case PIPE_VIDEO_FORMAT_HEVC
:
1285 dec
->msg
->body
.decode
.codec
.h265
= get_h265_msg(dec
, target
, (struct pipe_h265_picture_desc
*)picture
);
1286 if (dec
->ctx
.res
== NULL
) {
1288 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
1289 ctx_size
= calc_ctx_size_h265_main10(dec
, (struct pipe_h265_picture_desc
*)picture
);
1291 ctx_size
= calc_ctx_size_h265_main(dec
);
1292 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
)) {
1293 RVID_ERR("Can't allocated context buffer.\n");
1295 si_vid_clear_buffer(decoder
->context
, &dec
->ctx
);
1299 dec
->msg
->body
.decode
.dpb_reserved
= dec
->ctx
.res
->buf
->size
;
1302 case PIPE_VIDEO_FORMAT_VC1
:
1303 dec
->msg
->body
.decode
.codec
.vc1
= get_vc1_msg((struct pipe_vc1_picture_desc
*)picture
);
1306 case PIPE_VIDEO_FORMAT_MPEG12
:
1307 dec
->msg
->body
.decode
.codec
.mpeg2
= get_mpeg2_msg(dec
, (struct pipe_mpeg12_picture_desc
*)picture
);
1310 case PIPE_VIDEO_FORMAT_MPEG4
:
1311 dec
->msg
->body
.decode
.codec
.mpeg4
= get_mpeg4_msg(dec
, (struct pipe_mpeg4_picture_desc
*)picture
);
1314 case PIPE_VIDEO_FORMAT_JPEG
:
1322 dec
->msg
->body
.decode
.db_surf_tile_config
= dec
->msg
->body
.decode
.dt_surf_tile_config
;
1323 dec
->msg
->body
.decode
.extension_support
= 0x1;
1325 /* set at least the feedback buffer size */
1326 dec
->fb
[0] = dec
->fb_size
;
1331 send_cmd(dec
, RUVD_CMD_DPB_BUFFER
, dec
->dpb
.res
->buf
, 0,
1332 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1335 send_cmd(dec
, RUVD_CMD_CONTEXT_BUFFER
, dec
->ctx
.res
->buf
, 0,
1336 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
1337 send_cmd(dec
, RUVD_CMD_BITSTREAM_BUFFER
, bs_buf
->res
->buf
,
1338 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1339 send_cmd(dec
, RUVD_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
1340 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
1341 send_cmd(dec
, RUVD_CMD_FEEDBACK_BUFFER
, msg_fb_it_buf
->res
->buf
,
1342 FB_BUFFER_OFFSET
, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
1344 send_cmd(dec
, RUVD_CMD_ITSCALING_TABLE_BUFFER
, msg_fb_it_buf
->res
->buf
,
1345 FB_BUFFER_OFFSET
+ dec
->fb_size
, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
1346 set_reg(dec
, dec
->reg
.cntl
, 1);
1348 flush(dec
, PIPE_FLUSH_ASYNC
);
1353 * flush any outstanding command buffers to the hardware
1355 static void ruvd_flush(struct pipe_video_codec
*decoder
)
1360 * create and UVD decoder
1362 struct pipe_video_codec
*si_common_uvd_create_decoder(struct pipe_context
*context
,
1363 const struct pipe_video_codec
*templ
,
1364 ruvd_set_dtb set_dtb
)
1366 struct si_context
*sctx
= (struct si_context
*)context
;
1367 struct radeon_winsys
*ws
= sctx
->ws
;
1369 unsigned width
= templ
->width
, height
= templ
->height
;
1370 unsigned bs_buf_size
;
1371 struct ruvd_decoder
*dec
;
1374 switch(u_reduce_video_profile(templ
->profile
)) {
1375 case PIPE_VIDEO_FORMAT_MPEG12
:
1376 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
)
1377 return vl_create_mpeg12_decoder(context
, templ
);
1380 case PIPE_VIDEO_FORMAT_MPEG4
:
1381 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1382 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1384 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
1385 width
= align(width
, VL_MACROBLOCK_WIDTH
);
1386 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
1394 dec
= CALLOC_STRUCT(ruvd_decoder
);
1399 if (sctx
->screen
->info
.drm_major
< 3)
1400 dec
->use_legacy
= true;
1403 dec
->base
.context
= context
;
1404 dec
->base
.width
= width
;
1405 dec
->base
.height
= height
;
1407 dec
->base
.destroy
= ruvd_destroy
;
1408 dec
->base
.begin_frame
= ruvd_begin_frame
;
1409 dec
->base
.decode_macroblock
= ruvd_decode_macroblock
;
1410 dec
->base
.decode_bitstream
= ruvd_decode_bitstream
;
1411 dec
->base
.end_frame
= ruvd_end_frame
;
1412 dec
->base
.flush
= ruvd_flush
;
1414 dec
->stream_type
= profile2stream_type(dec
, sctx
->family
);
1415 dec
->set_dtb
= set_dtb
;
1416 dec
->stream_handle
= si_vid_alloc_stream_handle();
1417 dec
->screen
= context
->screen
;
1419 dec
->cs
= ws
->cs_create(sctx
->ctx
, RING_UVD
, NULL
, NULL
);
1421 RVID_ERR("Can't get command submission context.\n");
1425 for (i
= 0; i
< 16; i
++)
1426 dec
->render_pic_list
[i
] = NULL
;
1427 dec
->fb_size
= (sctx
->family
== CHIP_TONGA
) ? FB_BUFFER_SIZE_TONGA
:
1429 bs_buf_size
= width
* height
* (512 / (16 * 16));
1430 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1431 unsigned msg_fb_it_size
= FB_BUFFER_OFFSET
+ dec
->fb_size
;
1432 STATIC_ASSERT(sizeof(struct ruvd_msg
) <= FB_BUFFER_OFFSET
);
1434 msg_fb_it_size
+= IT_SCALING_TABLE_SIZE
;
1435 if (!si_vid_create_buffer(dec
->screen
, &dec
->msg_fb_it_buffers
[i
],
1436 msg_fb_it_size
, PIPE_USAGE_STAGING
)) {
1437 RVID_ERR("Can't allocated message buffers.\n");
1441 if (!si_vid_create_buffer(dec
->screen
, &dec
->bs_buffers
[i
],
1442 bs_buf_size
, PIPE_USAGE_STAGING
)) {
1443 RVID_ERR("Can't allocated bitstream buffers.\n");
1447 si_vid_clear_buffer(context
, &dec
->msg_fb_it_buffers
[i
]);
1448 si_vid_clear_buffer(context
, &dec
->bs_buffers
[i
]);
1451 dpb_size
= calc_dpb_size(dec
);
1453 if (!si_vid_create_buffer(dec
->screen
, &dec
->dpb
, dpb_size
, PIPE_USAGE_DEFAULT
)) {
1454 RVID_ERR("Can't allocated dpb.\n");
1457 si_vid_clear_buffer(context
, &dec
->dpb
);
1460 if (dec
->stream_type
== RUVD_CODEC_H264_PERF
&& sctx
->family
>= CHIP_POLARIS10
) {
1461 unsigned ctx_size
= calc_ctx_size_h264_perf(dec
);
1462 if (!si_vid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
)) {
1463 RVID_ERR("Can't allocated context buffer.\n");
1466 si_vid_clear_buffer(context
, &dec
->ctx
);
1469 if (sctx
->family
>= CHIP_POLARIS10
&& sctx
->screen
->info
.drm_minor
>= 3) {
1470 if (!si_vid_create_buffer(dec
->screen
, &dec
->sessionctx
,
1471 UVD_SESSION_CONTEXT_SIZE
,
1472 PIPE_USAGE_DEFAULT
)) {
1473 RVID_ERR("Can't allocated session ctx.\n");
1476 si_vid_clear_buffer(context
, &dec
->sessionctx
);
1479 if (sctx
->family
>= CHIP_VEGA10
) {
1480 dec
->reg
.data0
= RUVD_GPCOM_VCPU_DATA0_SOC15
;
1481 dec
->reg
.data1
= RUVD_GPCOM_VCPU_DATA1_SOC15
;
1482 dec
->reg
.cmd
= RUVD_GPCOM_VCPU_CMD_SOC15
;
1483 dec
->reg
.cntl
= RUVD_ENGINE_CNTL_SOC15
;
1485 dec
->reg
.data0
= RUVD_GPCOM_VCPU_DATA0
;
1486 dec
->reg
.data1
= RUVD_GPCOM_VCPU_DATA1
;
1487 dec
->reg
.cmd
= RUVD_GPCOM_VCPU_CMD
;
1488 dec
->reg
.cntl
= RUVD_ENGINE_CNTL
;
1491 map_msg_fb_it_buf(dec
);
1492 dec
->msg
->size
= sizeof(*dec
->msg
);
1493 dec
->msg
->msg_type
= RUVD_MSG_CREATE
;
1494 dec
->msg
->stream_handle
= dec
->stream_handle
;
1495 dec
->msg
->body
.create
.stream_type
= dec
->stream_type
;
1496 dec
->msg
->body
.create
.width_in_samples
= dec
->base
.width
;
1497 dec
->msg
->body
.create
.height_in_samples
= dec
->base
.height
;
1498 dec
->msg
->body
.create
.dpb_size
= dpb_size
;
1509 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
1511 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
1512 si_vid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
1513 si_vid_destroy_buffer(&dec
->bs_buffers
[i
]);
1516 si_vid_destroy_buffer(&dec
->dpb
);
1517 si_vid_destroy_buffer(&dec
->ctx
);
1518 si_vid_destroy_buffer(&dec
->sessionctx
);
1525 /* calculate top/bottom offset */
1526 static unsigned texture_offset(struct radeon_surf
*surface
, unsigned layer
,
1527 enum ruvd_surface_type type
)
1531 case RUVD_SURFACE_TYPE_LEGACY
:
1532 return surface
->u
.legacy
.level
[0].offset
+
1533 layer
* (uint64_t)surface
->u
.legacy
.level
[0].slice_size_dw
* 4;
1535 case RUVD_SURFACE_TYPE_GFX9
:
1536 return surface
->u
.gfx9
.surf_offset
+
1537 layer
* surface
->u
.gfx9
.surf_slice_size
;
1542 /* hw encode the aspect of macro tiles */
1543 static unsigned macro_tile_aspect(unsigned macro_tile_aspect
)
1545 switch (macro_tile_aspect
) {
1547 case 1: macro_tile_aspect
= 0; break;
1548 case 2: macro_tile_aspect
= 1; break;
1549 case 4: macro_tile_aspect
= 2; break;
1550 case 8: macro_tile_aspect
= 3; break;
1552 return macro_tile_aspect
;
1555 /* hw encode the bank width and height */
1556 static unsigned bank_wh(unsigned bankwh
)
1560 case 1: bankwh
= 0; break;
1561 case 2: bankwh
= 1; break;
1562 case 4: bankwh
= 2; break;
1563 case 8: bankwh
= 3; break;
1569 * fill decoding target field from the luma and chroma surfaces
1571 void si_uvd_set_dt_surfaces(struct ruvd_msg
*msg
, struct radeon_surf
*luma
,
1572 struct radeon_surf
*chroma
, enum ruvd_surface_type type
)
1576 case RUVD_SURFACE_TYPE_LEGACY
:
1577 msg
->body
.decode
.dt_pitch
= luma
->u
.legacy
.level
[0].nblk_x
* luma
->blk_w
;
1578 switch (luma
->u
.legacy
.level
[0].mode
) {
1579 case RADEON_SURF_MODE_LINEAR_ALIGNED
:
1580 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1581 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1583 case RADEON_SURF_MODE_1D
:
1584 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1585 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_1D_THIN
;
1587 case RADEON_SURF_MODE_2D
:
1588 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_8X8
;
1589 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_2D_THIN
;
1596 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0, type
);
1598 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0, type
);
1599 if (msg
->body
.decode
.dt_field_mode
) {
1600 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1, type
);
1602 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1, type
);
1604 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
1605 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
1609 assert(luma
->u
.legacy
.bankw
== chroma
->u
.legacy
.bankw
);
1610 assert(luma
->u
.legacy
.bankh
== chroma
->u
.legacy
.bankh
);
1611 assert(luma
->u
.legacy
.mtilea
== chroma
->u
.legacy
.mtilea
);
1614 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_WIDTH(bank_wh(luma
->u
.legacy
.bankw
));
1615 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_BANK_HEIGHT(bank_wh(luma
->u
.legacy
.bankh
));
1616 msg
->body
.decode
.dt_surf_tile_config
|= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma
->u
.legacy
.mtilea
));
1618 case RUVD_SURFACE_TYPE_GFX9
:
1619 msg
->body
.decode
.dt_pitch
= luma
->u
.gfx9
.surf_pitch
* luma
->blk_w
;
1620 /* SWIZZLE LINEAR MODE */
1621 msg
->body
.decode
.dt_tiling_mode
= RUVD_TILE_LINEAR
;
1622 msg
->body
.decode
.dt_array_mode
= RUVD_ARRAY_MODE_LINEAR
;
1623 msg
->body
.decode
.dt_luma_top_offset
= texture_offset(luma
, 0, type
);
1624 msg
->body
.decode
.dt_chroma_top_offset
= texture_offset(chroma
, 0, type
);
1625 if (msg
->body
.decode
.dt_field_mode
) {
1626 msg
->body
.decode
.dt_luma_bottom_offset
= texture_offset(luma
, 1, type
);
1627 msg
->body
.decode
.dt_chroma_bottom_offset
= texture_offset(chroma
, 1, type
);
1629 msg
->body
.decode
.dt_luma_bottom_offset
= msg
->body
.decode
.dt_luma_top_offset
;
1630 msg
->body
.decode
.dt_chroma_bottom_offset
= msg
->body
.decode
.dt_chroma_top_offset
;
1632 msg
->body
.decode
.dt_surf_tile_config
= 0;