1 /**************************************************************************
3 * Copyright 2017 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 #include "pipe/p_video_codec.h"
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
36 #include "vl/vl_mpeg12_decoder.h"
38 #include "r600_pipe_common.h"
39 #include "radeon_video.h"
40 #include "radeon_vcn_dec.h"
42 #define FB_BUFFER_OFFSET 0x1000
43 #define FB_BUFFER_SIZE 2048
44 #define IT_SCALING_TABLE_SIZE 992
45 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
47 #define RDECODE_GPCOM_VCPU_CMD 0x2070c
48 #define RDECODE_GPCOM_VCPU_DATA0 0x20710
49 #define RDECODE_GPCOM_VCPU_DATA1 0x20714
50 #define RDECODE_ENGINE_CNTL 0x20718
53 #define NUM_MPEG2_REFS 6
54 #define NUM_H264_REFS 17
55 #define NUM_VC1_REFS 5
57 struct radeon_decoder
{
58 struct pipe_video_codec base
;
60 unsigned stream_handle
;
62 unsigned frame_number
;
64 struct pipe_screen
*screen
;
65 struct radeon_winsys
*ws
;
66 struct radeon_winsys_cs
*cs
;
73 struct rvid_buffer msg_fb_it_buffers
[NUM_BUFFERS
];
74 struct rvid_buffer bs_buffers
[NUM_BUFFERS
];
75 struct rvid_buffer dpb
;
76 struct rvid_buffer ctx
;
77 struct rvid_buffer sessionctx
;
83 static void radeon_dec_destroy_associated_data(void *data
)
85 /* NOOP, since we only use an intptr */
88 static void rvcn_dec_message_create(struct radeon_decoder
*dec
)
93 static struct pb_buffer
*rvcn_dec_message_decode(struct radeon_decoder
*dec
)
99 static void rvcn_dec_message_destroy(struct radeon_decoder
*dec
)
104 static void rvcn_dec_message_feedback(struct radeon_decoder
*dec
)
109 /* flush IB to the hardware */
110 static int flush(struct radeon_decoder
*dec
, unsigned flags
)
112 return dec
->ws
->cs_flush(dec
->cs
, flags
, NULL
);
115 /* add a new set register command to the IB */
116 static void set_reg(struct radeon_decoder
*dec
, unsigned reg
, uint32_t val
)
118 radeon_emit(dec
->cs
, RDECODE_PKT0(reg
>> 2, 0));
119 radeon_emit(dec
->cs
, val
);
122 /* send a command to the VCPU through the GPCOM registers */
123 static void send_cmd(struct radeon_decoder
*dec
, unsigned cmd
,
124 struct pb_buffer
* buf
, uint32_t off
,
125 enum radeon_bo_usage usage
, enum radeon_bo_domain domain
)
129 dec
->ws
->cs_add_buffer(dec
->cs
, buf
, usage
| RADEON_USAGE_SYNCHRONIZED
,
130 domain
, RADEON_PRIO_UVD
);
131 addr
= dec
->ws
->buffer_get_virtual_address(buf
);
134 set_reg(dec
, RDECODE_GPCOM_VCPU_DATA0
, addr
);
135 set_reg(dec
, RDECODE_GPCOM_VCPU_DATA1
, addr
>> 32);
136 set_reg(dec
, RDECODE_GPCOM_VCPU_CMD
, cmd
<< 1);
139 /* do the codec needs an IT buffer ?*/
140 static bool have_it(struct radeon_decoder
*dec
)
142 return dec
->stream_type
== RDECODE_CODEC_H264_PERF
||
143 dec
->stream_type
== RDECODE_CODEC_H265
;
146 /* map the next available message/feedback/itscaling buffer */
147 static void map_msg_fb_it_buf(struct radeon_decoder
*dec
)
149 struct rvid_buffer
* buf
;
152 /* grab the current message/feedback buffer */
153 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
155 /* and map it for CPU access */
156 ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
, PIPE_TRANSFER_WRITE
);
158 /* calc buffer offsets */
161 dec
->fb
= (uint32_t *)(ptr
+ FB_BUFFER_OFFSET
);
163 dec
->it
= (uint8_t *)(ptr
+ FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
);
166 /* unmap and send a message command to the VCPU */
167 static void send_msg_buf(struct radeon_decoder
*dec
)
169 struct rvid_buffer
* buf
;
171 /* ignore the request if message/feedback buffer isn't mapped */
172 if (!dec
->msg
|| !dec
->fb
)
175 /* grab the current message buffer */
176 buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
178 /* unmap the buffer */
179 dec
->ws
->buffer_unmap(buf
->res
->buf
);
184 if (dec
->sessionctx
.res
)
185 send_cmd(dec
, RDECODE_CMD_SESSION_CONTEXT_BUFFER
,
186 dec
->sessionctx
.res
->buf
, 0, RADEON_USAGE_READWRITE
,
189 /* and send it to the hardware */
190 send_cmd(dec
, RDECODE_CMD_MSG_BUFFER
, buf
->res
->buf
, 0,
191 RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
194 /* cycle to the next set of buffers */
195 static void next_buffer(struct radeon_decoder
*dec
)
198 dec
->cur_buffer
%= NUM_BUFFERS
;
201 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder
*dec
)
203 unsigned width_in_mb
, height_in_mb
, ctx_size
;
204 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
205 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
207 unsigned max_references
= dec
->base
.max_references
+ 1;
209 // picture width & height in 16 pixel units
210 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
211 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
213 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
214 unsigned num_dpb_buffer
;
215 switch(dec
->base
.level
) {
217 num_dpb_buffer
= 8100 / fs_in_mb
;
220 num_dpb_buffer
= 18000 / fs_in_mb
;
223 num_dpb_buffer
= 20480 / fs_in_mb
;
226 num_dpb_buffer
= 32768 / fs_in_mb
;
229 num_dpb_buffer
= 34816 / fs_in_mb
;
232 num_dpb_buffer
= 110400 / fs_in_mb
;
235 num_dpb_buffer
= 184320 / fs_in_mb
;
238 num_dpb_buffer
= 184320 / fs_in_mb
;
242 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
243 ctx_size
= max_references
* align(width_in_mb
* height_in_mb
* 192, 256);
248 /* calculate size of reference picture buffer */
249 static unsigned calc_dpb_size(struct radeon_decoder
*dec
)
251 unsigned width_in_mb
, height_in_mb
, image_size
, dpb_size
;
253 // always align them to MB size for dpb calculation
254 unsigned width
= align(dec
->base
.width
, VL_MACROBLOCK_WIDTH
);
255 unsigned height
= align(dec
->base
.height
, VL_MACROBLOCK_HEIGHT
);
257 // always one more for currently decoded picture
258 unsigned max_references
= dec
->base
.max_references
+ 1;
260 // aligned size of a single frame
261 image_size
= align(width
, 32) * height
;
262 image_size
+= image_size
/ 2;
263 image_size
= align(image_size
, 1024);
265 // picture width & height in 16 pixel units
266 width_in_mb
= width
/ VL_MACROBLOCK_WIDTH
;
267 height_in_mb
= align(height
/ VL_MACROBLOCK_HEIGHT
, 2);
269 switch (u_reduce_video_profile(dec
->base
.profile
)) {
270 case PIPE_VIDEO_FORMAT_MPEG4_AVC
: {
271 unsigned fs_in_mb
= width_in_mb
* height_in_mb
;
272 unsigned num_dpb_buffer
;
274 switch(dec
->base
.level
) {
276 num_dpb_buffer
= 8100 / fs_in_mb
;
279 num_dpb_buffer
= 18000 / fs_in_mb
;
282 num_dpb_buffer
= 20480 / fs_in_mb
;
285 num_dpb_buffer
= 32768 / fs_in_mb
;
288 num_dpb_buffer
= 34816 / fs_in_mb
;
291 num_dpb_buffer
= 110400 / fs_in_mb
;
294 num_dpb_buffer
= 184320 / fs_in_mb
;
297 num_dpb_buffer
= 184320 / fs_in_mb
;
301 max_references
= MAX2(MIN2(NUM_H264_REFS
, num_dpb_buffer
), max_references
);
302 dpb_size
= image_size
* max_references
;
306 case PIPE_VIDEO_FORMAT_HEVC
:
307 if (dec
->base
.width
* dec
->base
.height
>= 4096*2000)
308 max_references
= MAX2(max_references
, 8);
310 max_references
= MAX2(max_references
, 17);
312 width
= align (width
, 16);
313 height
= align (height
, 16);
314 if (dec
->base
.profile
== PIPE_VIDEO_PROFILE_HEVC_MAIN_10
)
315 dpb_size
= align((align(width
, 32) * height
* 9) / 4, 256) * max_references
;
317 dpb_size
= align((align(width
, 32) * height
* 3) / 2, 256) * max_references
;
320 case PIPE_VIDEO_FORMAT_VC1
:
321 // the firmware seems to allways assume a minimum of ref frames
322 max_references
= MAX2(NUM_VC1_REFS
, max_references
);
324 // reference picture buffer
325 dpb_size
= image_size
* max_references
;
328 dpb_size
+= width_in_mb
* height_in_mb
* 128;
331 dpb_size
+= width_in_mb
* 64;
334 dpb_size
+= width_in_mb
* 128;
337 dpb_size
+= align(MAX2(width_in_mb
, height_in_mb
) * 7 * 16, 64);
340 case PIPE_VIDEO_FORMAT_MPEG12
:
341 // reference picture buffer, must be big enough for all frames
342 dpb_size
= image_size
* NUM_MPEG2_REFS
;
345 case PIPE_VIDEO_FORMAT_MPEG4
:
346 // reference picture buffer
347 dpb_size
= image_size
* max_references
;
350 dpb_size
+= width_in_mb
* height_in_mb
* 64;
353 dpb_size
+= align(width_in_mb
* height_in_mb
* 32, 64);
355 dpb_size
= MAX2(dpb_size
, 30 * 1024 * 1024);
359 // something is missing here
362 // at least use a sane default value
363 dpb_size
= 32 * 1024 * 1024;
370 * destroy this video decoder
372 static void radeon_dec_destroy(struct pipe_video_codec
*decoder
)
374 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
379 map_msg_fb_it_buf(dec
);
380 rvcn_dec_message_destroy(dec
);
385 dec
->ws
->cs_destroy(dec
->cs
);
387 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
388 rvid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
389 rvid_destroy_buffer(&dec
->bs_buffers
[i
]);
392 rvid_destroy_buffer(&dec
->dpb
);
393 rvid_destroy_buffer(&dec
->ctx
);
394 rvid_destroy_buffer(&dec
->sessionctx
);
400 * start decoding of a new frame
402 static void radeon_dec_begin_frame(struct pipe_video_codec
*decoder
,
403 struct pipe_video_buffer
*target
,
404 struct pipe_picture_desc
*picture
)
406 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
411 frame
= ++dec
->frame_number
;
412 vl_video_buffer_set_associated_data(target
, decoder
, (void *)frame
,
413 &radeon_dec_destroy_associated_data
);
416 dec
->bs_ptr
= dec
->ws
->buffer_map(
417 dec
->bs_buffers
[dec
->cur_buffer
].res
->buf
,
418 dec
->cs
, PIPE_TRANSFER_WRITE
);
422 * decode a macroblock
424 static void radeon_dec_decode_macroblock(struct pipe_video_codec
*decoder
,
425 struct pipe_video_buffer
*target
,
426 struct pipe_picture_desc
*picture
,
427 const struct pipe_macroblock
*macroblocks
,
428 unsigned num_macroblocks
)
430 /* not supported (yet) */
437 static void radeon_dec_decode_bitstream(struct pipe_video_codec
*decoder
,
438 struct pipe_video_buffer
*target
,
439 struct pipe_picture_desc
*picture
,
440 unsigned num_buffers
,
441 const void * const *buffers
,
442 const unsigned *sizes
)
444 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
452 for (i
= 0; i
< num_buffers
; ++i
) {
453 struct rvid_buffer
*buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
454 unsigned new_size
= dec
->bs_size
+ sizes
[i
];
456 if (new_size
> buf
->res
->buf
->size
) {
457 dec
->ws
->buffer_unmap(buf
->res
->buf
);
458 if (!rvid_resize_buffer(dec
->screen
, dec
->cs
, buf
, new_size
)) {
459 RVID_ERR("Can't resize bitstream buffer!");
463 dec
->bs_ptr
= dec
->ws
->buffer_map(buf
->res
->buf
, dec
->cs
,
464 PIPE_TRANSFER_WRITE
);
468 dec
->bs_ptr
+= dec
->bs_size
;
471 memcpy(dec
->bs_ptr
, buffers
[i
], sizes
[i
]);
472 dec
->bs_size
+= sizes
[i
];
473 dec
->bs_ptr
+= sizes
[i
];
478 * end decoding of the current frame
480 static void radeon_dec_end_frame(struct pipe_video_codec
*decoder
,
481 struct pipe_video_buffer
*target
,
482 struct pipe_picture_desc
*picture
)
484 struct radeon_decoder
*dec
= (struct radeon_decoder
*)decoder
;
485 struct pb_buffer
*dt
;
486 struct rvid_buffer
*msg_fb_it_buf
, *bs_buf
;
493 msg_fb_it_buf
= &dec
->msg_fb_it_buffers
[dec
->cur_buffer
];
494 bs_buf
= &dec
->bs_buffers
[dec
->cur_buffer
];
496 memset(dec
->bs_ptr
, 0, align(dec
->bs_size
, 128) - dec
->bs_size
);
497 dec
->ws
->buffer_unmap(bs_buf
->res
->buf
);
499 map_msg_fb_it_buf(dec
);
500 dt
= rvcn_dec_message_decode(dec
);
501 rvcn_dec_message_feedback(dec
);
504 send_cmd(dec
, RDECODE_CMD_DPB_BUFFER
, dec
->dpb
.res
->buf
, 0,
505 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
507 send_cmd(dec
, RDECODE_CMD_CONTEXT_BUFFER
, dec
->ctx
.res
->buf
, 0,
508 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_VRAM
);
509 send_cmd(dec
, RDECODE_CMD_BITSTREAM_BUFFER
, bs_buf
->res
->buf
,
510 0, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
511 send_cmd(dec
, RDECODE_CMD_DECODING_TARGET_BUFFER
, dt
, 0,
512 RADEON_USAGE_WRITE
, RADEON_DOMAIN_VRAM
);
513 send_cmd(dec
, RDECODE_CMD_FEEDBACK_BUFFER
, msg_fb_it_buf
->res
->buf
,
514 FB_BUFFER_OFFSET
, RADEON_USAGE_WRITE
, RADEON_DOMAIN_GTT
);
516 send_cmd(dec
, RDECODE_CMD_IT_SCALING_TABLE_BUFFER
, msg_fb_it_buf
->res
->buf
,
517 FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
, RADEON_USAGE_READ
, RADEON_DOMAIN_GTT
);
518 set_reg(dec
, RDECODE_ENGINE_CNTL
, 1);
520 flush(dec
, RADEON_FLUSH_ASYNC
);
525 * flush any outstanding command buffers to the hardware
527 static void radeon_dec_flush(struct pipe_video_codec
*decoder
)
532 * create and HW decoder
534 struct pipe_video_codec
*radeon_create_decoder(struct pipe_context
*context
,
535 const struct pipe_video_codec
*templ
)
537 struct radeon_winsys
* ws
= ((struct r600_common_context
*)context
)->ws
;
538 struct r600_common_context
*rctx
= (struct r600_common_context
*)context
;
539 unsigned width
= templ
->width
, height
= templ
->height
;
540 unsigned dpb_size
, bs_buf_size
, stream_type
= 0;
541 struct radeon_decoder
*dec
;
544 switch(u_reduce_video_profile(templ
->profile
)) {
545 case PIPE_VIDEO_FORMAT_MPEG12
:
546 if (templ
->entrypoint
> PIPE_VIDEO_ENTRYPOINT_BITSTREAM
)
547 return vl_create_mpeg12_decoder(context
, templ
);
548 stream_type
= RDECODE_CODEC_MPEG2_VLD
;
550 case PIPE_VIDEO_FORMAT_MPEG4
:
551 width
= align(width
, VL_MACROBLOCK_WIDTH
);
552 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
553 stream_type
= RDECODE_CODEC_MPEG4
;
555 case PIPE_VIDEO_FORMAT_VC1
:
556 stream_type
= RDECODE_CODEC_VC1
;
558 case PIPE_VIDEO_FORMAT_MPEG4_AVC
:
559 width
= align(width
, VL_MACROBLOCK_WIDTH
);
560 height
= align(height
, VL_MACROBLOCK_HEIGHT
);
561 stream_type
= RDECODE_CODEC_H264_PERF
;
563 case PIPE_VIDEO_FORMAT_HEVC
:
564 stream_type
= RDECODE_CODEC_H265
;
571 dec
= CALLOC_STRUCT(radeon_decoder
);
577 dec
->base
.context
= context
;
578 dec
->base
.width
= width
;
579 dec
->base
.height
= height
;
581 dec
->base
.destroy
= radeon_dec_destroy
;
582 dec
->base
.begin_frame
= radeon_dec_begin_frame
;
583 dec
->base
.decode_macroblock
= radeon_dec_decode_macroblock
;
584 dec
->base
.decode_bitstream
= radeon_dec_decode_bitstream
;
585 dec
->base
.end_frame
= radeon_dec_end_frame
;
586 dec
->base
.flush
= radeon_dec_flush
;
588 dec
->stream_type
= stream_type
;
589 dec
->stream_handle
= rvid_alloc_stream_handle();
590 dec
->screen
= context
->screen
;
592 dec
->cs
= ws
->cs_create(rctx
->ctx
, RING_VCN_DEC
, NULL
, NULL
);
594 RVID_ERR("Can't get command submission context.\n");
598 bs_buf_size
= width
* height
* (512 / (16 * 16));
599 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
600 unsigned msg_fb_it_size
= FB_BUFFER_OFFSET
+ FB_BUFFER_SIZE
;
602 msg_fb_it_size
+= IT_SCALING_TABLE_SIZE
;
603 if (!rvid_create_buffer(dec
->screen
, &dec
->msg_fb_it_buffers
[i
],
604 msg_fb_it_size
, PIPE_USAGE_STAGING
)) {
605 RVID_ERR("Can't allocated message buffers.\n");
609 if (!rvid_create_buffer(dec
->screen
, &dec
->bs_buffers
[i
],
610 bs_buf_size
, PIPE_USAGE_STAGING
)) {
611 RVID_ERR("Can't allocated bitstream buffers.\n");
615 rvid_clear_buffer(context
, &dec
->msg_fb_it_buffers
[i
]);
616 rvid_clear_buffer(context
, &dec
->bs_buffers
[i
]);
619 dpb_size
= calc_dpb_size(dec
);
621 if (!rvid_create_buffer(dec
->screen
, &dec
->dpb
, dpb_size
, PIPE_USAGE_DEFAULT
)) {
622 RVID_ERR("Can't allocated dpb.\n");
626 rvid_clear_buffer(context
, &dec
->dpb
);
628 if (dec
->stream_type
== RDECODE_CODEC_H264_PERF
) {
629 unsigned ctx_size
= calc_ctx_size_h264_perf(dec
);
630 if (!rvid_create_buffer(dec
->screen
, &dec
->ctx
, ctx_size
, PIPE_USAGE_DEFAULT
)) {
631 RVID_ERR("Can't allocated context buffer.\n");
634 rvid_clear_buffer(context
, &dec
->ctx
);
637 if (!rvid_create_buffer(dec
->screen
, &dec
->sessionctx
,
638 RDECODE_SESSION_CONTEXT_SIZE
,
639 PIPE_USAGE_DEFAULT
)) {
640 RVID_ERR("Can't allocated session ctx.\n");
643 rvid_clear_buffer(context
, &dec
->sessionctx
);
645 map_msg_fb_it_buf(dec
);
646 rvcn_dec_message_create(dec
);
657 if (dec
->cs
) dec
->ws
->cs_destroy(dec
->cs
);
659 for (i
= 0; i
< NUM_BUFFERS
; ++i
) {
660 rvid_destroy_buffer(&dec
->msg_fb_it_buffers
[i
]);
661 rvid_destroy_buffer(&dec
->bs_buffers
[i
]);
664 rvid_destroy_buffer(&dec
->dpb
);
665 rvid_destroy_buffer(&dec
->ctx
);
666 rvid_destroy_buffer(&dec
->sessionctx
);