1 /**************************************************************************
3 * Copyright 2013 Advanced Micro Devices, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "radeon_vce.h"
30 #include "pipe/p_video_codec.h"
31 #include "radeon_video.h"
32 #include "radeonsi/si_pipe.h"
33 #include "util/u_memory.h"
34 #include "util/u_video.h"
35 #include "vl/vl_video_buffer.h"
39 #define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
40 #define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
41 #define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
42 #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
43 #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
44 #define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
45 #define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
46 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
47 #define FW_53 (53 << 24)
50 * flush commands to the hardware
52 static void flush(struct rvce_encoder
*enc
)
54 enc
->ws
->cs_flush(enc
->cs
, PIPE_FLUSH_ASYNC
, NULL
);
55 enc
->task_info_idx
= 0;
60 static void dump_feedback(struct rvce_encoder
*enc
, struct rvid_buffer
*fb
)
62 uint32_t *ptr
= enc
->ws
->buffer_map(fb
->res
->buf
, enc
->cs
, PIPE_TRANSFER_READ_WRITE
);
64 fprintf(stderr
, "\n");
65 fprintf(stderr
, "encStatus:\t\t\t%08x\n", ptr
[i
++]);
66 fprintf(stderr
, "encHasBitstream:\t\t%08x\n", ptr
[i
++]);
67 fprintf(stderr
, "encHasAudioBitstream:\t\t%08x\n", ptr
[i
++]);
68 fprintf(stderr
, "encBitstreamOffset:\t\t%08x\n", ptr
[i
++]);
69 fprintf(stderr
, "encBitstreamSize:\t\t%08x\n", ptr
[i
++]);
70 fprintf(stderr
, "encAudioBitstreamOffset:\t%08x\n", ptr
[i
++]);
71 fprintf(stderr
, "encAudioBitstreamSize:\t\t%08x\n", ptr
[i
++]);
72 fprintf(stderr
, "encExtrabytes:\t\t\t%08x\n", ptr
[i
++]);
73 fprintf(stderr
, "encAudioExtrabytes:\t\t%08x\n", ptr
[i
++]);
74 fprintf(stderr
, "videoTimeStamp:\t\t\t%08x\n", ptr
[i
++]);
75 fprintf(stderr
, "audioTimeStamp:\t\t\t%08x\n", ptr
[i
++]);
76 fprintf(stderr
, "videoOutputType:\t\t%08x\n", ptr
[i
++]);
77 fprintf(stderr
, "attributeFlags:\t\t\t%08x\n", ptr
[i
++]);
78 fprintf(stderr
, "seiPrivatePackageOffset:\t%08x\n", ptr
[i
++]);
79 fprintf(stderr
, "seiPrivatePackageSize:\t\t%08x\n", ptr
[i
++]);
80 fprintf(stderr
, "\n");
81 enc
->ws
->buffer_unmap(fb
->res
->buf
);
86 * reset the CPB handling
88 static void reset_cpb(struct rvce_encoder
*enc
)
92 list_inithead(&enc
->cpb_slots
);
93 for (i
= 0; i
< enc
->cpb_num
; ++i
) {
94 struct rvce_cpb_slot
*slot
= &enc
->cpb_array
[i
];
96 slot
->picture_type
= PIPE_H264_ENC_PICTURE_TYPE_SKIP
;
98 slot
->pic_order_cnt
= 0;
99 list_addtail(&slot
->list
, &enc
->cpb_slots
);
104 * sort l0 and l1 to the top of the list
106 static void sort_cpb(struct rvce_encoder
*enc
)
108 struct rvce_cpb_slot
*i
, *l0
= NULL
, *l1
= NULL
;
110 LIST_FOR_EACH_ENTRY (i
, &enc
->cpb_slots
, list
) {
111 if (i
->frame_num
== enc
->pic
.ref_idx_l0
)
114 if (i
->frame_num
== enc
->pic
.ref_idx_l1
)
117 if (enc
->pic
.picture_type
== PIPE_H264_ENC_PICTURE_TYPE_P
&& l0
)
120 if (enc
->pic
.picture_type
== PIPE_H264_ENC_PICTURE_TYPE_B
&& l0
&& l1
)
126 list_add(&l1
->list
, &enc
->cpb_slots
);
131 list_add(&l0
->list
, &enc
->cpb_slots
);
136 * get number of cpbs based on dpb
138 static unsigned get_cpb_num(struct rvce_encoder
*enc
)
140 unsigned w
= align(enc
->base
.width
, 16) / 16;
141 unsigned h
= align(enc
->base
.height
, 16) / 16;
144 switch (enc
->base
.level
) {
186 return MIN2(dpb
/ (w
* h
), 16);
190 * Get the slot for the currently encoded frame
192 struct rvce_cpb_slot
*si_current_slot(struct rvce_encoder
*enc
)
194 return LIST_ENTRY(struct rvce_cpb_slot
, enc
->cpb_slots
.prev
, list
);
198 * Get the slot for L0
200 struct rvce_cpb_slot
*si_l0_slot(struct rvce_encoder
*enc
)
202 return LIST_ENTRY(struct rvce_cpb_slot
, enc
->cpb_slots
.next
, list
);
206 * Get the slot for L1
208 struct rvce_cpb_slot
*si_l1_slot(struct rvce_encoder
*enc
)
210 return LIST_ENTRY(struct rvce_cpb_slot
, enc
->cpb_slots
.next
->next
, list
);
214 * Calculate the offsets into the CPB
216 void si_vce_frame_offset(struct rvce_encoder
*enc
, struct rvce_cpb_slot
*slot
, signed *luma_offset
,
217 signed *chroma_offset
)
219 struct si_screen
*sscreen
= (struct si_screen
*)enc
->screen
;
220 unsigned pitch
, vpitch
, fsize
;
222 if (sscreen
->info
.chip_class
< GFX9
) {
223 pitch
= align(enc
->luma
->u
.legacy
.level
[0].nblk_x
* enc
->luma
->bpe
, 128);
224 vpitch
= align(enc
->luma
->u
.legacy
.level
[0].nblk_y
, 16);
226 pitch
= align(enc
->luma
->u
.gfx9
.surf_pitch
* enc
->luma
->bpe
, 256);
227 vpitch
= align(enc
->luma
->u
.gfx9
.surf_height
, 16);
229 fsize
= pitch
* (vpitch
+ vpitch
/ 2);
231 *luma_offset
= slot
->index
* fsize
;
232 *chroma_offset
= *luma_offset
+ pitch
* vpitch
;
236 * destroy this video encoder
238 static void rvce_destroy(struct pipe_video_codec
*encoder
)
240 struct rvce_encoder
*enc
= (struct rvce_encoder
*)encoder
;
241 if (enc
->stream_handle
) {
242 struct rvid_buffer fb
;
243 si_vid_create_buffer(enc
->screen
, &fb
, 512, PIPE_USAGE_STAGING
);
248 si_vid_destroy_buffer(&fb
);
250 si_vid_destroy_buffer(&enc
->cpb
);
251 enc
->ws
->cs_destroy(enc
->cs
);
252 FREE(enc
->cpb_array
);
256 static void rvce_begin_frame(struct pipe_video_codec
*encoder
, struct pipe_video_buffer
*source
,
257 struct pipe_picture_desc
*picture
)
259 struct rvce_encoder
*enc
= (struct rvce_encoder
*)encoder
;
260 struct vl_video_buffer
*vid_buf
= (struct vl_video_buffer
*)source
;
261 struct pipe_h264_enc_picture_desc
*pic
= (struct pipe_h264_enc_picture_desc
*)picture
;
263 bool need_rate_control
=
264 enc
->pic
.rate_ctrl
.rate_ctrl_method
!= pic
->rate_ctrl
.rate_ctrl_method
||
265 enc
->pic
.quant_i_frames
!= pic
->quant_i_frames
||
266 enc
->pic
.quant_p_frames
!= pic
->quant_p_frames
||
267 enc
->pic
.quant_b_frames
!= pic
->quant_b_frames
||
268 enc
->pic
.rate_ctrl
.target_bitrate
!= pic
->rate_ctrl
.target_bitrate
;
271 enc
->si_get_pic_param(enc
, pic
);
273 enc
->get_buffer(vid_buf
->resources
[0], &enc
->handle
, &enc
->luma
);
274 enc
->get_buffer(vid_buf
->resources
[1], NULL
, &enc
->chroma
);
276 if (pic
->picture_type
== PIPE_H264_ENC_PICTURE_TYPE_IDR
)
278 else if (pic
->picture_type
== PIPE_H264_ENC_PICTURE_TYPE_P
||
279 pic
->picture_type
== PIPE_H264_ENC_PICTURE_TYPE_B
)
282 if (!enc
->stream_handle
) {
283 struct rvid_buffer fb
;
284 enc
->stream_handle
= si_vid_alloc_stream_handle();
285 si_vid_create_buffer(enc
->screen
, &fb
, 512, PIPE_USAGE_STAGING
);
292 // dump_feedback(enc, &fb);
293 si_vid_destroy_buffer(&fb
);
294 need_rate_control
= false;
297 if (need_rate_control
) {
304 static void rvce_encode_bitstream(struct pipe_video_codec
*encoder
,
305 struct pipe_video_buffer
*source
,
306 struct pipe_resource
*destination
, void **fb
)
308 struct rvce_encoder
*enc
= (struct rvce_encoder
*)encoder
;
309 enc
->get_buffer(destination
, &enc
->bs_handle
, NULL
);
310 enc
->bs_size
= destination
->width0
;
312 *fb
= enc
->fb
= CALLOC_STRUCT(rvid_buffer
);
313 if (!si_vid_create_buffer(enc
->screen
, enc
->fb
, 512, PIPE_USAGE_STAGING
)) {
314 RVID_ERR("Can't create feedback buffer.\n");
317 if (!radeon_emitted(enc
->cs
, 0))
323 static void rvce_end_frame(struct pipe_video_codec
*encoder
, struct pipe_video_buffer
*source
,
324 struct pipe_picture_desc
*picture
)
326 struct rvce_encoder
*enc
= (struct rvce_encoder
*)encoder
;
327 struct rvce_cpb_slot
*slot
= LIST_ENTRY(struct rvce_cpb_slot
, enc
->cpb_slots
.prev
, list
);
329 if (!enc
->dual_inst
|| enc
->bs_idx
> 1)
332 /* update the CPB backtrack with the just encoded frame */
333 slot
->picture_type
= enc
->pic
.picture_type
;
334 slot
->frame_num
= enc
->pic
.frame_num
;
335 slot
->pic_order_cnt
= enc
->pic
.pic_order_cnt
;
336 if (!enc
->pic
.not_referenced
) {
337 list_del(&slot
->list
);
338 list_add(&slot
->list
, &enc
->cpb_slots
);
342 static void rvce_get_feedback(struct pipe_video_codec
*encoder
, void *feedback
, unsigned *size
)
344 struct rvce_encoder
*enc
= (struct rvce_encoder
*)encoder
;
345 struct rvid_buffer
*fb
= feedback
;
348 uint32_t *ptr
= enc
->ws
->buffer_map(fb
->res
->buf
, enc
->cs
,
349 PIPE_TRANSFER_READ_WRITE
| RADEON_TRANSFER_TEMPORARY
);
352 *size
= ptr
[4] - ptr
[9];
357 enc
->ws
->buffer_unmap(fb
->res
->buf
);
359 // dump_feedback(enc, fb);
360 si_vid_destroy_buffer(fb
);
365 * flush any outstanding command buffers to the hardware
367 static void rvce_flush(struct pipe_video_codec
*encoder
)
369 struct rvce_encoder
*enc
= (struct rvce_encoder
*)encoder
;
374 static void rvce_cs_flush(void *ctx
, unsigned flags
, struct pipe_fence_handle
**fence
)
379 struct pipe_video_codec
*si_vce_create_encoder(struct pipe_context
*context
,
380 const struct pipe_video_codec
*templ
,
381 struct radeon_winsys
*ws
, rvce_get_buffer get_buffer
)
383 struct si_screen
*sscreen
= (struct si_screen
*)context
->screen
;
384 struct si_context
*sctx
= (struct si_context
*)context
;
385 struct rvce_encoder
*enc
;
386 struct pipe_video_buffer
*tmp_buf
, templat
= {};
387 struct radeon_surf
*tmp_surf
;
390 if (!sscreen
->info
.vce_fw_version
) {
391 RVID_ERR("Kernel doesn't supports VCE!\n");
394 } else if (!si_vce_is_fw_version_supported(sscreen
)) {
395 RVID_ERR("Unsupported VCE fw version loaded!\n");
399 enc
= CALLOC_STRUCT(rvce_encoder
);
403 if (sscreen
->info
.is_amdgpu
)
405 if ((!sscreen
->info
.is_amdgpu
&& sscreen
->info
.drm_minor
>= 42) || sscreen
->info
.is_amdgpu
)
407 if (sscreen
->info
.family
>= CHIP_TONGA
&& sscreen
->info
.family
!= CHIP_STONEY
&&
408 sscreen
->info
.family
!= CHIP_POLARIS11
&& sscreen
->info
.family
!= CHIP_POLARIS12
&&
409 sscreen
->info
.family
!= CHIP_VEGAM
)
410 enc
->dual_pipe
= true;
411 /* TODO enable B frame with dual instance */
412 if ((sscreen
->info
.family
>= CHIP_TONGA
) && (templ
->max_references
== 1) &&
413 (sscreen
->info
.vce_harvest_config
== 0))
414 enc
->dual_inst
= true;
417 enc
->base
.context
= context
;
419 enc
->base
.destroy
= rvce_destroy
;
420 enc
->base
.begin_frame
= rvce_begin_frame
;
421 enc
->base
.encode_bitstream
= rvce_encode_bitstream
;
422 enc
->base
.end_frame
= rvce_end_frame
;
423 enc
->base
.flush
= rvce_flush
;
424 enc
->base
.get_feedback
= rvce_get_feedback
;
425 enc
->get_buffer
= get_buffer
;
427 enc
->screen
= context
->screen
;
429 enc
->cs
= ws
->cs_create(sctx
->ctx
, RING_VCE
, rvce_cs_flush
, enc
, false);
431 RVID_ERR("Can't get command submission context.\n");
435 templat
.buffer_format
= PIPE_FORMAT_NV12
;
436 templat
.width
= enc
->base
.width
;
437 templat
.height
= enc
->base
.height
;
438 templat
.interlaced
= false;
439 if (!(tmp_buf
= context
->create_video_buffer(context
, &templat
))) {
440 RVID_ERR("Can't create video buffer.\n");
444 enc
->cpb_num
= get_cpb_num(enc
);
448 get_buffer(((struct vl_video_buffer
*)tmp_buf
)->resources
[0], NULL
, &tmp_surf
);
450 cpb_size
= (sscreen
->info
.chip_class
< GFX9
)
451 ? align(tmp_surf
->u
.legacy
.level
[0].nblk_x
* tmp_surf
->bpe
, 128) *
452 align(tmp_surf
->u
.legacy
.level
[0].nblk_y
, 32)
455 align(tmp_surf
->u
.gfx9
.surf_pitch
* tmp_surf
->bpe
, 256) *
456 align(tmp_surf
->u
.gfx9
.surf_height
, 32);
458 cpb_size
= cpb_size
* 3 / 2;
459 cpb_size
= cpb_size
* enc
->cpb_num
;
461 cpb_size
+= RVCE_MAX_AUX_BUFFER_NUM
* RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE
* 2;
462 tmp_buf
->destroy(tmp_buf
);
463 if (!si_vid_create_buffer(enc
->screen
, &enc
->cpb
, cpb_size
, PIPE_USAGE_DEFAULT
)) {
464 RVID_ERR("Can't create CPB buffer.\n");
468 enc
->cpb_array
= CALLOC(enc
->cpb_num
, sizeof(struct rvce_cpb_slot
));
474 switch (sscreen
->info
.vce_fw_version
) {
476 si_vce_40_2_2_init(enc
);
493 if ((sscreen
->info
.vce_fw_version
& (0xff << 24)) >= FW_53
) {
503 enc
->ws
->cs_destroy(enc
->cs
);
505 si_vid_destroy_buffer(&enc
->cpb
);
507 FREE(enc
->cpb_array
);
513 * check if kernel has the right fw version loaded
515 bool si_vce_is_fw_version_supported(struct si_screen
*sscreen
)
517 switch (sscreen
->info
.vce_fw_version
) {
528 if ((sscreen
->info
.vce_fw_version
& (0xff << 24)) >= FW_53
)
536 * Add the buffer as relocation to the current command submission
538 void si_vce_add_buffer(struct rvce_encoder
*enc
, struct pb_buffer
*buf
, enum radeon_bo_usage usage
,
539 enum radeon_bo_domain domain
, signed offset
)
543 reloc_idx
= enc
->ws
->cs_add_buffer(enc
->cs
, buf
, usage
| RADEON_USAGE_SYNCHRONIZED
, domain
, 0);
546 addr
= enc
->ws
->buffer_get_virtual_address(buf
);
547 addr
= addr
+ offset
;
551 offset
+= enc
->ws
->buffer_get_reloc_offset(buf
);
552 RVCE_CS(reloc_idx
* 4);