2 * Copyright 2013 Ilia Mirkin
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "nv50/nv84_video.h"
27 uint32_t chroma_format_idc
; // 00
28 uint32_t pad
[(0x128 - 0x4) / 4];
29 uint32_t log2_max_frame_num_minus4
; // 128
30 uint32_t pic_order_cnt_type
; // 12c
31 uint32_t log2_max_pic_order_cnt_lsb_minus4
; // 130
32 uint32_t delta_pic_order_always_zero_flag
; // 134
33 uint32_t num_ref_frames
; // 138
34 uint32_t pic_width_in_mbs_minus1
; // 13c
35 uint32_t pic_height_in_map_units_minus1
; // 140
36 uint32_t frame_mbs_only_flag
; // 144
37 uint32_t mb_adaptive_frame_field_flag
; // 148
38 uint32_t direct_8x8_inference_flag
; // 14c
41 uint32_t entropy_coding_mode_flag
; // 00
42 uint32_t pic_order_present_flag
; // 04
43 uint32_t num_slice_groups_minus1
; // 08
44 uint32_t slice_group_map_type
; // 0c
45 uint32_t pad1
[0x60 / 4];
49 uint32_t num_ref_idx_l0_active_minus1
; // 7c
50 uint32_t num_ref_idx_l1_active_minus1
; // 80
51 uint32_t weighted_pred_flag
; // 84
52 uint32_t weighted_bipred_idc
; // 88
53 uint32_t pic_init_qp_minus26
; // 8c
54 uint32_t chroma_qp_index_offset
; // 90
55 uint32_t deblocking_filter_control_present_flag
; // 94
56 uint32_t constrained_intra_pred_flag
; // 98
57 uint32_t redundant_pic_cnt_present_flag
; // 9c
58 uint32_t transform_8x8_mode_flag
; // a0
59 uint32_t pad2
[(0x1c8 - 0xa0 - 4) / 4];
60 uint32_t second_chroma_qp_index_offset
; // 1c8
62 uint32_t curr_pic_order_cnt
; // 1d0
63 uint32_t field_order_cnt
[2]; // 1d4
64 uint32_t curr_mvidx
; // 1dc
67 uint32_t field_is_ref
; // 04 // bit0: top, bit1: bottom
68 uint8_t is_long_term
; // 08
69 uint8_t non_existing
; // 09
72 uint32_t frame_idx
; // 0c
73 uint32_t field_order_cnt
[2]; // 10
75 uint8_t field_pic_flag
; // 1c
85 nv84_decoder_bsp(struct nv84_decoder
*dec
,
86 struct pipe_h264_picture_desc
*desc
,
88 const void *const *data
,
89 const unsigned *num_bytes
,
90 struct nv84_video_buffer
*dest
)
93 uint32_t more_params
[0x44 / 4] = {0};
94 unsigned total_bytes
= 0;
96 static const uint32_t end
[] = {0x0b010000, 0, 0x0b010000, 0};
97 char indexes
[17] = {0};
98 struct nouveau_pushbuf
*push
= dec
->bsp_pushbuf
;
99 struct nouveau_pushbuf_refn bo_refs
[] = {
100 { dec
->vpring
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_VRAM
},
101 { dec
->mbring
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_VRAM
},
102 { dec
->bitstream
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_GART
},
103 { dec
->fence
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_VRAM
},
106 nouveau_bo_wait(dec
->fence
, NOUVEAU_BO_RDWR
, dec
->client
);
108 STATIC_ASSERT(sizeof(struct iparm
) == 0x530);
110 memset(¶ms
, 0, sizeof(params
));
112 dest
->frame_num
= dest
->frame_num_max
= desc
->frame_num
;
114 for (i
= 0; i
< 16; i
++) {
115 struct iref
*ref
= ¶ms
.ipicparm
.refs
[i
];
116 struct nv84_video_buffer
*frame
= (struct nv84_video_buffer
*)desc
->ref
[i
];
118 /* The frame index is relative to the last IDR frame. So once the frame
119 * num goes back to 0, previous reference frames need to have a negative
122 if (desc
->frame_num
>= frame
->frame_num_max
) {
123 frame
->frame_num_max
= desc
->frame_num
;
125 frame
->frame_num
-= frame
->frame_num_max
+ 1;
126 frame
->frame_num_max
= desc
->frame_num
;
128 ref
->non_existing
= 0;
129 ref
->field_is_ref
= (desc
->top_is_reference
[i
] ? 1 : 0) |
130 (desc
->bottom_is_reference
[i
] ? 2 : 0);
131 ref
->is_long_term
= desc
->is_long_term
[i
];
132 ref
->field_order_cnt
[0] = desc
->field_order_cnt_list
[i
][0];
133 ref
->field_order_cnt
[1] = desc
->field_order_cnt_list
[i
][1];
134 ref
->frame_idx
= frame
->frame_num
;
135 ref
->u00
= ref
->mvidx
= frame
->mvidx
;
136 ref
->field_pic_flag
= desc
->field_pic_flag
;
137 indexes
[frame
->mvidx
] = 1;
140 /* Needs to be adjusted if we ever support non-4:2:0 videos */
141 params
.iseqparm
.chroma_format_idc
= 1;
143 params
.iseqparm
.pic_width_in_mbs_minus1
= mb(dec
->base
.width
) - 1;
144 if (desc
->field_pic_flag
|| desc
->pps
->sps
->mb_adaptive_frame_field_flag
)
145 params
.iseqparm
.pic_height_in_map_units_minus1
= mb_half(dec
->base
.height
) - 1;
147 params
.iseqparm
.pic_height_in_map_units_minus1
= mb(dec
->base
.height
) - 1;
149 if (desc
->bottom_field_flag
)
150 params
.ipicparm
.curr_pic_order_cnt
= desc
->field_order_cnt
[1];
152 params
.ipicparm
.curr_pic_order_cnt
= desc
->field_order_cnt
[0];
153 params
.ipicparm
.field_order_cnt
[0] = desc
->field_order_cnt
[0];
154 params
.ipicparm
.field_order_cnt
[1] = desc
->field_order_cnt
[1];
155 if (desc
->is_reference
) {
156 if (dest
->mvidx
< 0) {
157 for (i
= 0; i
< desc
->num_ref_frames
+ 1; i
++) {
163 assert(i
!= desc
->num_ref_frames
+ 1);
166 params
.ipicparm
.u1cc
= params
.ipicparm
.curr_mvidx
= dest
->mvidx
;
169 params
.iseqparm
.num_ref_frames
= desc
->num_ref_frames
;
170 params
.iseqparm
.mb_adaptive_frame_field_flag
= desc
->pps
->sps
->mb_adaptive_frame_field_flag
;
171 params
.ipicparm
.constrained_intra_pred_flag
= desc
->pps
->constrained_intra_pred_flag
;
172 params
.ipicparm
.weighted_pred_flag
= desc
->pps
->weighted_pred_flag
;
173 params
.ipicparm
.weighted_bipred_idc
= desc
->pps
->weighted_bipred_idc
;
174 params
.iseqparm
.frame_mbs_only_flag
= desc
->pps
->sps
->frame_mbs_only_flag
;
175 params
.ipicparm
.transform_8x8_mode_flag
= desc
->pps
->transform_8x8_mode_flag
;
176 params
.ipicparm
.chroma_qp_index_offset
= desc
->pps
->chroma_qp_index_offset
;
177 params
.ipicparm
.second_chroma_qp_index_offset
= desc
->pps
->second_chroma_qp_index_offset
;
178 params
.ipicparm
.pic_init_qp_minus26
= desc
->pps
->pic_init_qp_minus26
;
179 params
.ipicparm
.num_ref_idx_l0_active_minus1
= desc
->num_ref_idx_l0_active_minus1
;
180 params
.ipicparm
.num_ref_idx_l1_active_minus1
= desc
->num_ref_idx_l1_active_minus1
;
181 params
.iseqparm
.log2_max_frame_num_minus4
= desc
->pps
->sps
->log2_max_frame_num_minus4
;
182 params
.iseqparm
.pic_order_cnt_type
= desc
->pps
->sps
->pic_order_cnt_type
;
183 params
.iseqparm
.log2_max_pic_order_cnt_lsb_minus4
= desc
->pps
->sps
->log2_max_pic_order_cnt_lsb_minus4
;
184 params
.iseqparm
.delta_pic_order_always_zero_flag
= desc
->pps
->sps
->delta_pic_order_always_zero_flag
;
185 params
.iseqparm
.direct_8x8_inference_flag
= desc
->pps
->sps
->direct_8x8_inference_flag
;
186 params
.ipicparm
.entropy_coding_mode_flag
= desc
->pps
->entropy_coding_mode_flag
;
187 params
.ipicparm
.pic_order_present_flag
= desc
->pps
->bottom_field_pic_order_in_frame_present_flag
;
188 params
.ipicparm
.deblocking_filter_control_present_flag
= desc
->pps
->deblocking_filter_control_present_flag
;
189 params
.ipicparm
.redundant_pic_cnt_present_flag
= desc
->pps
->redundant_pic_cnt_present_flag
;
191 memcpy(dec
->bitstream
->map
, ¶ms
, sizeof(params
));
192 for (i
= 0; i
< num_buffers
; i
++) {
193 assert(total_bytes
+ num_bytes
[i
] < dec
->bitstream
->size
/ 2 - 0x700);
194 memcpy(dec
->bitstream
->map
+ 0x700 + total_bytes
, data
[i
], num_bytes
[i
]);
195 total_bytes
+= num_bytes
[i
];
197 memcpy(dec
->bitstream
->map
+ 0x700 + total_bytes
, end
, sizeof(end
));
198 total_bytes
+= sizeof(end
);
199 more_params
[1] = total_bytes
;
200 memcpy(dec
->bitstream
->map
+ 0x600, more_params
, sizeof(more_params
));
202 PUSH_SPACE(push
, 5 + 21 + 3 + 2 + 4 + 2);
203 nouveau_pushbuf_refn(push
, bo_refs
, ARRAY_SIZE(bo_refs
));
205 /* Wait for the fence = 1 */
206 BEGIN_NV04(push
, SUBC_BSP(0x10), 4);
207 PUSH_DATAh(push
, dec
->fence
->offset
);
208 PUSH_DATA (push
, dec
->fence
->offset
);
212 /* TODO: Use both halves of bitstream/vpring for alternating frames */
214 /* Kick off the BSP */
215 BEGIN_NV04(push
, SUBC_BSP(0x400), 20);
216 PUSH_DATA (push
, dec
->bitstream
->offset
>> 8);
217 PUSH_DATA (push
, (dec
->bitstream
->offset
>> 8) + 7);
218 PUSH_DATA (push
, dec
->bitstream
->size
/ 2 - 0x700);
219 PUSH_DATA (push
, (dec
->bitstream
->offset
>> 8) + 6);
221 PUSH_DATA (push
, dec
->mbring
->offset
>> 8);
222 PUSH_DATA (push
, dec
->frame_size
);
223 PUSH_DATA (push
, (dec
->mbring
->offset
+ dec
->frame_size
) >> 8);
224 PUSH_DATA (push
, dec
->vpring
->offset
>> 8);
225 PUSH_DATA (push
, dec
->vpring
->size
/ 2);
226 PUSH_DATA (push
, dec
->vpring_residual
);
227 PUSH_DATA (push
, dec
->vpring_ctrl
);
229 PUSH_DATA (push
, dec
->vpring_residual
);
230 PUSH_DATA (push
, dec
->vpring_residual
+ dec
->vpring_ctrl
);
231 PUSH_DATA (push
, dec
->vpring_deblock
);
232 PUSH_DATA (push
, (dec
->vpring
->offset
+ dec
->vpring_ctrl
+
233 dec
->vpring_residual
+ dec
->vpring_deblock
) >> 8);
234 PUSH_DATA (push
, 0x654321);
236 PUSH_DATA (push
, 0x100008);
238 BEGIN_NV04(push
, SUBC_BSP(0x620), 2);
242 BEGIN_NV04(push
, SUBC_BSP(0x300), 1);
245 /* Write fence = 2, intr */
246 BEGIN_NV04(push
, SUBC_BSP(0x610), 3);
247 PUSH_DATAh(push
, dec
->fence
->offset
);
248 PUSH_DATA (push
, dec
->fence
->offset
);
251 BEGIN_NV04(push
, SUBC_BSP(0x304), 1);
252 PUSH_DATA (push
, 0x101);