#include "nouveau_vp3_video.h"
struct mpeg12_picparm_vp {
- uint16_t width; // 00 in mb units
- uint16_t height; // 02 in mb units
-
- uint32_t unk04; // 04 stride for Y?
- uint32_t unk08; // 08 stride for CbCr?
-
- uint32_t ofs[6]; // 1c..20 ofs
- uint32_t bucket_size; // 24
- uint32_t inter_ring_data_size; // 28
- uint16_t unk2c; // 2c
- uint16_t alternate_scan; // 2e
- uint16_t unk30; // 30 not seen set yet
- uint16_t picture_structure; // 32
- uint16_t pad2[3];
- uint16_t unk3a; // 3a set on I frame?
-
- uint32_t f_code[4]; // 3c
- uint32_t picture_coding_type; // 4c
- uint32_t intra_dc_precision; // 50
- uint32_t q_scale_type; // 54
- uint32_t top_field_first; // 58
- uint32_t full_pel_forward_vector; // 5c
- uint32_t full_pel_backward_vector; // 60
- uint8_t intra_quantizer_matrix[0x40]; // 64
- uint8_t non_intra_quantizer_matrix[0x40]; // a4
+ uint16_t width; // 00 in mb units
+ uint16_t height; // 02 in mb units
+
+ uint32_t unk04; // 04 stride for Y?
+ uint32_t unk08; // 08 stride for CbCr?
+
+ uint32_t ofs[6]; // 1c..20 ofs
+ uint32_t bucket_size; // 24
+ uint32_t inter_ring_data_size; // 28
+ uint16_t unk2c; // 2c
+ uint16_t alternate_scan; // 2e
+ uint16_t unk30; // 30 not seen set yet
+ uint16_t picture_structure; // 32
+ uint16_t pad2[3];
+ uint16_t unk3a; // 3a set on I frame?
+
+ uint32_t f_code[4]; // 3c
+ uint32_t picture_coding_type; // 4c
+ uint32_t intra_dc_precision; // 50
+ uint32_t q_scale_type; // 54
+ uint32_t top_field_first; // 58
+ uint32_t full_pel_forward_vector; // 5c
+ uint32_t full_pel_backward_vector; // 60
+ uint8_t intra_quantizer_matrix[0x40]; // 64
+ uint8_t non_intra_quantizer_matrix[0x40]; // a4
};
struct mpeg4_picparm_vp {
- uint32_t width; // 00 in normal units
- uint32_t height; // 04 in normal units
- uint32_t unk08; // stride 1
- uint32_t unk0c; // stride 2
- uint32_t ofs[6]; // 10..24 ofs
- uint32_t bucket_size; // 28
- uint32_t pad1; // 2c, pad
- uint32_t pad2; // 30
- uint32_t inter_ring_data_size; // 34
-
- uint32_t trd[2]; // 38, 3c
- uint32_t trb[2]; // 40, 44
- uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
- uint16_t f_code_fw; // 4c
- uint16_t f_code_bw; // 4e
- uint8_t interlaced; // 50
-
- uint8_t quant_type; // bool, written to 528
- uint8_t quarter_sample; // bool, written to 548
- uint8_t short_video_header; // bool, negated written to 528 shifted by 1
- uint8_t u54; // bool, written to 0x740
- uint8_t vop_coding_type; // 55
- uint8_t rounding_control; // 56
- uint8_t alternate_vertical_scan_flag; // 57 bool
- uint8_t top_field_first; // bool, written to vuc
-
- uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
- uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY?
-
- uint32_t intra[0x10]; // 9c
- uint32_t non_intra[0x10]; // bc
- // udc..uff pad?
+ uint32_t width; // 00 in normal units
+ uint32_t height; // 04 in normal units
+ uint32_t unk08; // stride 1
+ uint32_t unk0c; // stride 2
+ uint32_t ofs[6]; // 10..24 ofs
+ uint32_t bucket_size; // 28
+ uint32_t pad1; // 2c, pad
+ uint32_t pad2; // 30
+ uint32_t inter_ring_data_size; // 34
+
+ uint32_t trd[2]; // 38, 3c
+ uint32_t trb[2]; // 40, 44
+ uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
+ uint16_t f_code_fw; // 4c
+ uint16_t f_code_bw; // 4e
+ uint8_t interlaced; // 50
+
+ uint8_t quant_type; // bool, written to 528
+ uint8_t quarter_sample; // bool, written to 548
+ uint8_t short_video_header; // bool, negated written to 528 shifted by 1
+ uint8_t u54; // bool, written to 0x740
+ uint8_t vop_coding_type; // 55
+ uint8_t rounding_control; // 56
+ uint8_t alternate_vertical_scan_flag; // 57 bool
+ uint8_t top_field_first; // bool, written to vuc
+
+ uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
+
+ uint32_t intra[0x10]; // 5c
+ uint32_t non_intra[0x10]; // 9c
+ uint32_t pad5[0x10]; // bc what does this do?
+ // udc..uff pad?
};
// Full version, with data pumped from BSP
struct vc1_picparm_vp {
- uint32_t bucket_size; // 00
- uint32_t pad; // 04
-
- uint32_t inter_ring_data_size; // 08
- uint32_t unk0c; // stride 1
- uint32_t unk10; // stride 2
- uint32_t ofs[6]; // 14..28 ofs
-
- uint16_t width; // 2c
- uint16_t height; // 2e
-
- uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
- uint8_t loopfilter; // 31 written into vuc
- uint8_t fastuvmc; // 32, written into vuc
- uint8_t dquant; // 33
-
- uint8_t overlap; // 34
- uint8_t quantizer; // 35
- uint8_t u36; // 36, bool
- uint8_t pad2; // 37, to align to 0x38
+ uint32_t bucket_size; // 00
+ uint32_t pad; // 04
+
+ uint32_t inter_ring_data_size; // 08
+ uint32_t unk0c; // stride 1
+ uint32_t unk10; // stride 2
+ uint32_t ofs[6]; // 14..28 ofs
+
+ uint16_t width; // 2c
+ uint16_t height; // 2e
+
+ uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
+ uint8_t loopfilter; // 31 written into vuc
+ uint8_t fastuvmc; // 32, written into vuc
+ uint8_t dquant; // 33
+
+ uint8_t overlap; // 34
+ uint8_t quantizer; // 35
+ uint8_t u36; // 36, bool
+ uint8_t pad2; // 37, to align to 0x38
};
struct h264_picparm_vp { // 700..a00
- uint16_t width, height;
- uint32_t stride1, stride2; // 04 08
- uint32_t ofs[6]; // 0c..24 in-image offset
-
- uint32_t u24; // nfi ac8 ?
- uint32_t bucket_size; // 28 bucket size
- uint32_t inter_ring_data_size; // 2c
-
- unsigned f0 : 1; // 0 0x01: into 640 shifted by 3, 540 shifted by 5, half size something?
- unsigned f1 : 1; // 1 0x02: into vuc ofs 56
- unsigned weighted_pred_flag : 1; // 2 0x04
- unsigned f3 : 1; // 3 0x08: into vuc ofs 68
- unsigned is_reference : 1; // 4
- unsigned interlace : 1; // 5 field_pic_flag
- unsigned bottom_field_flag : 1; // 6
- unsigned f7 : 1; // 7 0x80: nfi yet
-
- signed log2_max_frame_num_minus4 : 4; // 31 0..3
- unsigned u31_45 : 2; // 31 4..5
- unsigned pic_order_cnt_type : 2; // 31 6..7
- signed pic_init_qp_minus26 : 6; // 32 0..5
- signed chroma_qp_index_offset : 5; // 32 6..10
- signed second_chroma_qp_index_offset : 5; // 32 11..15
-
- unsigned weighted_bipred_idc : 2; // 34 0..1
- unsigned fifo_dec_index : 7; // 34 2..8
- unsigned tmp_idx : 5; // 34 9..13
- unsigned frame_number : 16; // 34 14..29
- unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
- unsigned u34_3131 : 1; // 34 31..31 pad?
-
- uint32_t field_order_cnt[2]; // 38, 3c
-
- struct { // 40
- // 0x00223102
- // nfi (needs: top_is_reference, bottom_is_reference, is_long_term, maybe some other state that was saved..
- unsigned fifo_idx : 7; // 00 0..6
- unsigned tmp_idx : 5; // 00 7..11
- unsigned unk12 : 1; // 00 12 not seen yet, but set, maybe top_is_reference
- unsigned unk13 : 1; // 00 13 not seen yet, but set, maybe bottom_is_reference?
- unsigned unk14 : 1; // 00 14 skipped?
- unsigned notseenyet : 1; // 00 15 pad?
- unsigned unk16 : 1; // 00 16
- unsigned unk17 : 4; // 00 17..20
- unsigned unk21 : 4; // 00 21..24
- unsigned pad : 7; // 00 d25..31
-
- uint32_t field_order_cnt[2]; // 04,08
- uint32_t frame_idx; // 0c
- } refs[0x10];
-
- uint8_t m4x4[6][16]; // 140
- uint8_t m8x8[2][64]; // 1a0
- uint32_t u220; // 220 number of extra reorder_list to append?
- uint8_t u224[0x20]; // 224..244 reorder_list append ?
- uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
+ uint16_t width, height;
+ uint32_t stride1, stride2; // 04 08
+ uint32_t ofs[6]; // 0c..24 in-image offset
+
+ uint32_t tmp_stride;
+ uint32_t bucket_size; // 28 bucket size
+ uint32_t inter_ring_data_size; // 2c
+
+ unsigned mb_adaptive_frame_field_flag : 1; // 0
+ unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
+ unsigned weighted_pred_flag : 1; // 2 0x04
+ unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
+ unsigned is_reference : 1; // 4
+ unsigned interlace : 1; // 5 field_pic_flag
+ unsigned bottom_field_flag : 1; // 6
+ unsigned second_field : 1; // 7 0x80: nfi yet
+
+ signed log2_max_frame_num_minus4 : 4; // 31 0..3
+ unsigned chroma_format_idc : 2; // 31 4..5
+ unsigned pic_order_cnt_type : 2; // 31 6..7
+ signed pic_init_qp_minus26 : 6; // 32 0..5
+ signed chroma_qp_index_offset : 5; // 32 6..10
+ signed second_chroma_qp_index_offset : 5; // 32 11..15
+
+ unsigned weighted_bipred_idc : 2; // 34 0..1
+ unsigned fifo_dec_index : 7; // 34 2..8
+ unsigned tmp_idx : 5; // 34 9..13
+ unsigned frame_number : 16; // 34 14..29
+ unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
+ unsigned u34_3131 : 1; // 34 31..31 pad?
+
+ uint32_t field_order_cnt[2]; // 38, 3c
+
+ struct { // 40
+ unsigned fifo_idx : 7; // 00 0..6
+ unsigned tmp_idx : 5; // 00 7..11
+ unsigned top_is_reference : 1; // 00 12
+ unsigned bottom_is_reference : 1; // 00 13
+ unsigned is_long_term : 1; // 00 14
+ unsigned notseenyet : 1; // 00 15 pad?
+ unsigned field_pic_flag : 1; // 00 16
+ unsigned top_field_marking : 4; // 00 17..20
+ unsigned bottom_field_marking : 4; // 00 21..24
+ unsigned pad : 7; // 00 d25..31
+
+ uint32_t field_order_cnt[2]; // 04,08
+ uint32_t frame_idx; // 0c
+ } refs[0x10];
+
+ uint8_t m4x4[6][16]; // 140
+ uint8_t m8x8[2][64]; // 1a0
+ uint32_t u220; // 220 number of extra reorder_list to append?
+ uint8_t u224[0x20]; // 224..244 reorder_list append ?
+ uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
};
static void
nouveau_vp3_handle_references(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *refs[16], unsigned seq, struct nouveau_vp3_video_buffer *target)
{
- unsigned h264 = u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC;
- unsigned i, idx, empty_spot = dec->base.max_references + 1;
+ unsigned i, idx, empty_spot = ~0;
+
for (i = 0; i < dec->base.max_references; ++i) {
if (!refs[i])
continue;
idx = refs[i]->valid_ref;
//debug_printf("ref[%i] %p in slot %i\n", i, refs[i], idx);
- assert(target != refs[i] ||
- (h264 && empty_spot &&
- (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top)));
- if (target == refs[i])
- empty_spot = 0;
if (dec->refs[idx].vidbuf != refs[i]) {
debug_printf("%p is not a real ref\n", refs[i]);
assert(dec->refs[idx].vidbuf == refs[i]);
dec->refs[idx].last_used = seq;
}
- if (!empty_spot)
+
+ if (dec->refs[target->valid_ref].vidbuf == target) {
+ dec->refs[target->valid_ref].last_used = seq;
return;
+ }
/* Try to find a real empty spot first, there should be one..
*/
for (i = 0; i < dec->base.max_references + 1; ++i) {
- if (dec->refs[i].last_used < seq) {
- if (!dec->refs[i].vidbuf) {
- empty_spot = i;
- break;
- }
- if (empty_spot < dec->base.max_references+1 &&
- dec->refs[empty_spot].last_used < dec->refs[i].last_used)
- continue;
+ if (dec->refs[i].vidbuf == target) {
+ empty_spot = i;
+ break;
+ } else if (!dec->refs[i].last_used) {
+ empty_spot = i;
+ } else if (empty_spot == ~0U && dec->refs[i].last_used != seq)
empty_spot = i;
- }
}
+
assert(empty_spot < dec->base.max_references+1);
dec->refs[empty_spot].last_used = seq;
-// debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, i);
+// debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, empty_spot);
dec->refs[empty_spot].vidbuf = target;
dec->refs[empty_spot].decoded_bottom = dec->refs[empty_spot].decoded_top = 0;
target->valid_ref = empty_spot;
{
struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
- assert(!(dec->base.width & 0xf));
*is_ref = desc->vop_coding_type <= 1;
pic_vp->width = dec->base.width;
nouveau_vp3_ycbcr_offsets(dec, &h->ofs[1], &h->ofs[3], &h->ofs[4]);
h->ofs[5] = h->ofs[3];
h->ofs[0] = h->ofs[2] = 0;
- h->u24 = dec->tmp_stride >> 8;
- assert(h->u24);
- nouveau_vp3_inter_sizes(dec, 1, &ring, &h->bucket_size, &h->inter_ring_data_size);
+ h->tmp_stride = dec->tmp_stride >> 8;
+ assert(h->tmp_stride);
+ nouveau_vp3_inter_sizes(dec, d->slice_count, &ring, &h->bucket_size, &h->inter_ring_data_size);
h->u220 = 0;
- h->f0 = d->pps->sps->mb_adaptive_frame_field_flag;
- h->f1 = d->pps->sps->direct_8x8_inference_flag;
+ h->mb_adaptive_frame_field_flag = d->pps->sps->mb_adaptive_frame_field_flag;
+ h->direct_8x8_inference_flag = d->pps->sps->direct_8x8_inference_flag;
h->weighted_pred_flag = d->pps->weighted_pred_flag;
- h->f3 = d->pps->constrained_intra_pred_flag;
+ h->constrained_intra_pred_flag = d->pps->constrained_intra_pred_flag;
h->is_reference = d->is_reference;
h->interlace = d->field_pic_flag;
h->bottom_field_flag = d->bottom_field_flag;
- h->f7 = 0; // TODO: figure out when set..
+ h->second_field = 0; // set in nouveau_vp3_fill_picparm_h264_vp_refs
+
h->log2_max_frame_num_minus4 = d->pps->sps->log2_max_frame_num_minus4;
- h->u31_45 = 1;
+ h->chroma_format_idc = 1;
h->pic_order_cnt_type = d->pps->sps->pic_order_cnt_type;
h->pic_init_qp_minus26 = d->pps->pic_init_qp_minus26;
h->u34_3030 = h->u34_3131 = 0;
h->field_order_cnt[0] = d->field_order_cnt[0];
h->field_order_cnt[1] = d->field_order_cnt[1];
- memset(h->refs, 0, sizeof(h->refs));
memcpy(h->m4x4, d->pps->ScalingList4x4, sizeof(h->m4x4));
memcpy(h->m8x8, d->pps->ScalingList8x8, sizeof(h->m8x8));
h->u220 = 0;
refs[j] = (struct nouveau_vp3_video_buffer *)d->ref[i];
h->refs[j].fifo_idx = j + 1;
h->refs[j].tmp_idx = refs[j]->valid_ref;
+ assert(dec->refs[refs[j]->valid_ref].vidbuf == refs[j]);
h->refs[j].field_order_cnt[0] = d->field_order_cnt_list[i][0];
h->refs[j].field_order_cnt[1] = d->field_order_cnt_list[i][1];
h->refs[j].frame_idx = d->frame_num_list[i];
if (!dec->refs[refs[j]->valid_ref].field_pic_flag) {
- h->refs[j].unk12 = d->top_is_reference[i];
- h->refs[j].unk13 = d->bottom_is_reference[i];
+ h->refs[j].top_is_reference = d->top_is_reference[i];
+ h->refs[j].bottom_is_reference = d->bottom_is_reference[i];
}
- h->refs[j].unk14 = 0;
+ h->refs[j].is_long_term = d->is_long_term[i];
h->refs[j].notseenyet = 0;
- h->refs[j].unk16 = dec->refs[refs[j]->valid_ref].field_pic_flag;
- h->refs[j].unk17 = dec->refs[refs[j]->valid_ref].decoded_top &&
- d->top_is_reference[i];
- h->refs[j].unk21 = dec->refs[refs[j]->valid_ref].decoded_bottom &&
- d->bottom_is_reference[i];
+ h->refs[j].field_pic_flag = dec->refs[refs[j]->valid_ref].field_pic_flag;
+ h->refs[j].top_field_marking =
+ dec->refs[refs[j]->valid_ref].decoded_top && d->top_is_reference[i] ?
+ 1 + d->is_long_term[i] : 0;
+ h->refs[j].bottom_field_marking =
+ dec->refs[refs[j]->valid_ref].decoded_bottom && d->bottom_is_reference[i] ?
+ 1 + d->is_long_term[i] : 0;
h->refs[j].pad = 0;
- assert(!d->is_long_term[i]);
j++;
}
for (; i < 16; ++i)
assert(d->num_ref_frames <= dec->base.max_references);
for (; i < d->num_ref_frames; ++i)
- h->refs[j].unk16 = d->field_pic_flag;
+ h->refs[j].field_pic_flag = d->field_pic_flag;
*(struct h264_picparm_vp *)map = *h;
return 0x1113;
assert(dec->refs[target->valid_ref].vidbuf == target);
// debug_printf("Target: %p\n", target);
+ if (!dec->refs[target->valid_ref].decoded_top &&
+ !dec->refs[target->valid_ref].decoded_bottom)
+ dec->refs[target->valid_ref].decoded_first = d->bottom_field_flag;
+ else if (dec->refs[target->valid_ref].decoded_first != d->bottom_field_flag)
+ h->second_field = 1;
+
h->tmp_idx = target->valid_ref;
dec->refs[target->valid_ref].field_pic_flag = d->field_pic_flag;
if (!d->field_pic_flag || d->bottom_field_flag)
case PIPE_VIDEO_FORMAT_MPEG12:
*caps = nouveau_vp3_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp);
nouveau_vp3_handle_references(dec, refs, dec->fence_seq, target);
+ switch (desc.mpeg12->picture_structure) {
+ case PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP:
+ dec->refs[target->valid_ref].decoded_top = 1;
+ break;
+ case PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_BOTTOM:
+ dec->refs[target->valid_ref].decoded_bottom = 1;
+ break;
+ default:
+ dec->refs[target->valid_ref].decoded_top = 1;
+ dec->refs[target->valid_ref].decoded_bottom = 1;
+ break;
+ }
return;
case PIPE_VIDEO_FORMAT_MPEG4:
*caps = nouveau_vp3_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp);
nouveau_vp3_handle_references(dec, refs, dec->fence_seq, target);
+ // XXX: Correct?
+ if (!desc.mpeg4->interlaced) {
+ dec->refs[target->valid_ref].decoded_top = 1;
+ dec->refs[target->valid_ref].decoded_bottom = 1;
+ } else if (desc.mpeg4->top_field_first) {
+ if (!dec->refs[target->valid_ref].decoded_top)
+ dec->refs[target->valid_ref].decoded_top = 1;
+ else
+ dec->refs[target->valid_ref].decoded_bottom = 1;
+ } else {
+ if (!dec->refs[target->valid_ref].decoded_bottom)
+ dec->refs[target->valid_ref].decoded_bottom = 1;
+ else
+ dec->refs[target->valid_ref].decoded_top = 1;
+ }
return;
case PIPE_VIDEO_FORMAT_VC1: {
*caps = nouveau_vp3_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp);
nouveau_vp3_handle_references(dec, refs, dec->fence_seq, target);
+ if (desc.vc1->frame_coding_mode == 3)
+ debug_printf("Field-Interlaced possibly incorrectly handled\n");
+ dec->refs[target->valid_ref].decoded_top = 1;
+ dec->refs[target->valid_ref].decoded_bottom = 1;
return;
}
case PIPE_VIDEO_FORMAT_MPEG4_AVC: {