2 * Copyright 2013 Ilia Mirkin
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "nv50/nv84_video.h"
27 uint32_t chroma_format_idc
; // 00
28 uint32_t pad
[(0x128 - 0x4) / 4];
29 uint32_t log2_max_frame_num_minus4
; // 128
30 uint32_t pic_order_cnt_type
; // 12c
31 uint32_t log2_max_pic_order_cnt_lsb_minus4
; // 130
32 uint32_t delta_pic_order_always_zero_flag
; // 134
33 uint32_t num_ref_frames
; // 138
34 uint32_t pic_width_in_mbs_minus1
; // 13c
35 uint32_t pic_height_in_map_units_minus1
; // 140
36 uint32_t frame_mbs_only_flag
; // 144
37 uint32_t mb_adaptive_frame_field_flag
; // 148
38 uint32_t direct_8x8_inference_flag
; // 14c
41 uint32_t entropy_coding_mode_flag
; // 00
42 uint32_t pic_order_present_flag
; // 04
43 uint32_t num_slice_groups_minus1
; // 08
44 uint32_t slice_group_map_type
; // 0c
45 uint32_t pad1
[0x60 / 4];
49 uint32_t num_ref_idx_l0_active_minus1
; // 7c
50 uint32_t num_ref_idx_l1_active_minus1
; // 80
51 uint32_t weighted_pred_flag
; // 84
52 uint32_t weighted_bipred_idc
; // 88
53 uint32_t pic_init_qp_minus26
; // 8c
54 uint32_t chroma_qp_index_offset
; // 90
55 uint32_t deblocking_filter_control_present_flag
; // 94
56 uint32_t constrained_intra_pred_flag
; // 98
57 uint32_t redundant_pic_cnt_present_flag
; // 9c
58 uint32_t transform_8x8_mode_flag
; // a0
59 uint32_t pad2
[(0x1c8 - 0xa0 - 4) / 4];
60 uint32_t second_chroma_qp_index_offset
; // 1c8
62 uint32_t curr_pic_order_cnt
; // 1d0
63 uint32_t field_order_cnt
[2]; // 1d4
64 uint32_t curr_mvidx
; // 1dc
67 uint32_t field_is_ref
; // 04 // bit0: top, bit1: bottom
68 uint8_t is_long_term
; // 08
69 uint8_t non_existing
; // 09
70 uint32_t frame_idx
; // 0c
71 uint32_t field_order_cnt
[2]; // 10
73 uint8_t field_pic_flag
; // 1c
80 nv84_decoder_bsp(struct nv84_decoder
*dec
,
81 struct pipe_h264_picture_desc
*desc
,
83 const void *const *data
,
84 const unsigned *num_bytes
,
85 struct nv84_video_buffer
*dest
)
88 uint32_t more_params
[0x44 / 4] = {0};
89 unsigned total_bytes
= 0;
91 static const uint32_t end
[] = {0x0b010000, 0, 0x0b010000, 0};
92 char indexes
[17] = {0};
93 struct nouveau_pushbuf
*push
= dec
->bsp_pushbuf
;
94 struct nouveau_pushbuf_refn bo_refs
[] = {
95 { dec
->vpring
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_VRAM
},
96 { dec
->mbring
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_VRAM
},
97 { dec
->bitstream
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_GART
},
98 { dec
->fence
, NOUVEAU_BO_RDWR
| NOUVEAU_BO_VRAM
},
101 nouveau_bo_wait(dec
->fence
, NOUVEAU_BO_RDWR
, dec
->client
);
103 STATIC_ASSERT(sizeof(struct iparm
) == 0x530);
105 memset(¶ms
, 0, sizeof(params
));
107 dest
->frame_num
= dest
->frame_num_max
= desc
->frame_num
;
109 for (i
= 0; i
< 16; i
++) {
110 struct iref
*ref
= ¶ms
.ipicparm
.refs
[i
];
111 struct nv84_video_buffer
*frame
= (struct nv84_video_buffer
*)desc
->ref
[i
];
113 /* The frame index is relative to the last IDR frame. So once the frame
114 * num goes back to 0, previous reference frames need to have a negative
117 if (desc
->frame_num
>= frame
->frame_num_max
) {
118 frame
->frame_num_max
= desc
->frame_num
;
120 frame
->frame_num
-= frame
->frame_num_max
+ 1;
121 frame
->frame_num_max
= desc
->frame_num
;
123 ref
->non_existing
= 0;
124 ref
->field_is_ref
= (desc
->top_is_reference
[i
] ? 1 : 0) |
125 (desc
->bottom_is_reference
[i
] ? 2 : 0);
126 ref
->is_long_term
= desc
->is_long_term
[i
];
127 ref
->field_order_cnt
[0] = desc
->field_order_cnt_list
[i
][0];
128 ref
->field_order_cnt
[1] = desc
->field_order_cnt_list
[i
][1];
129 ref
->frame_idx
= frame
->frame_num
;
130 ref
->u00
= ref
->mvidx
= frame
->mvidx
;
131 ref
->field_pic_flag
= desc
->field_pic_flag
;
132 indexes
[frame
->mvidx
] = 1;
135 /* Needs to be adjusted if we ever support non-4:2:0 videos */
136 params
.iseqparm
.chroma_format_idc
= 1;
138 params
.iseqparm
.pic_width_in_mbs_minus1
= mb(dec
->base
.width
) - 1;
139 if (desc
->field_pic_flag
|| desc
->mb_adaptive_frame_field_flag
)
140 params
.iseqparm
.pic_height_in_map_units_minus1
= mb_half(dec
->base
.height
) - 1;
142 params
.iseqparm
.pic_height_in_map_units_minus1
= mb(dec
->base
.height
) - 1;
144 if (desc
->bottom_field_flag
)
145 params
.ipicparm
.curr_pic_order_cnt
= desc
->field_order_cnt
[1];
147 params
.ipicparm
.curr_pic_order_cnt
= desc
->field_order_cnt
[0];
148 params
.ipicparm
.field_order_cnt
[0] = desc
->field_order_cnt
[0];
149 params
.ipicparm
.field_order_cnt
[1] = desc
->field_order_cnt
[1];
150 if (desc
->is_reference
) {
151 if (dest
->mvidx
< 0) {
152 for (i
= 0; i
< desc
->num_ref_frames
+ 1; i
++) {
158 assert(i
!= desc
->num_ref_frames
+ 1);
161 params
.ipicparm
.u1cc
= params
.ipicparm
.curr_mvidx
= dest
->mvidx
;
164 params
.iseqparm
.num_ref_frames
= desc
->num_ref_frames
;
165 params
.iseqparm
.mb_adaptive_frame_field_flag
= desc
->mb_adaptive_frame_field_flag
;
166 params
.ipicparm
.constrained_intra_pred_flag
= desc
->constrained_intra_pred_flag
;
167 params
.ipicparm
.weighted_pred_flag
= desc
->weighted_pred_flag
;
168 params
.ipicparm
.weighted_bipred_idc
= desc
->weighted_bipred_idc
;
169 params
.iseqparm
.frame_mbs_only_flag
= desc
->frame_mbs_only_flag
;
170 params
.ipicparm
.transform_8x8_mode_flag
= desc
->transform_8x8_mode_flag
;
171 params
.ipicparm
.chroma_qp_index_offset
= desc
->chroma_qp_index_offset
;
172 params
.ipicparm
.second_chroma_qp_index_offset
= desc
->second_chroma_qp_index_offset
;
173 params
.ipicparm
.pic_init_qp_minus26
= desc
->pic_init_qp_minus26
;
174 params
.ipicparm
.num_ref_idx_l0_active_minus1
= desc
->num_ref_idx_l0_active_minus1
;
175 params
.ipicparm
.num_ref_idx_l1_active_minus1
= desc
->num_ref_idx_l1_active_minus1
;
176 params
.iseqparm
.log2_max_frame_num_minus4
= desc
->log2_max_frame_num_minus4
;
177 params
.iseqparm
.pic_order_cnt_type
= desc
->pic_order_cnt_type
;
178 params
.iseqparm
.log2_max_pic_order_cnt_lsb_minus4
= desc
->log2_max_pic_order_cnt_lsb_minus4
;
179 params
.iseqparm
.delta_pic_order_always_zero_flag
= desc
->delta_pic_order_always_zero_flag
;
180 params
.iseqparm
.direct_8x8_inference_flag
= desc
->direct_8x8_inference_flag
;
181 params
.ipicparm
.entropy_coding_mode_flag
= desc
->entropy_coding_mode_flag
;
182 params
.ipicparm
.pic_order_present_flag
= desc
->pic_order_present_flag
;
183 params
.ipicparm
.deblocking_filter_control_present_flag
= desc
->deblocking_filter_control_present_flag
;
184 params
.ipicparm
.redundant_pic_cnt_present_flag
= desc
->redundant_pic_cnt_present_flag
;
186 memcpy(dec
->bitstream
->map
, ¶ms
, sizeof(params
));
187 for (i
= 0; i
< num_buffers
; i
++) {
188 assert(total_bytes
+ num_bytes
[i
] < dec
->bitstream
->size
/ 2 - 0x700);
189 memcpy(dec
->bitstream
->map
+ 0x700 + total_bytes
, data
[i
], num_bytes
[i
]);
190 total_bytes
+= num_bytes
[i
];
192 memcpy(dec
->bitstream
->map
+ 0x700 + total_bytes
, end
, sizeof(end
));
193 total_bytes
+= sizeof(end
);
194 more_params
[1] = total_bytes
;
195 memcpy(dec
->bitstream
->map
+ 0x600, more_params
, sizeof(more_params
));
197 PUSH_SPACE(push
, 5 + 21 + 3 + 2 + 4 + 2);
198 nouveau_pushbuf_refn(push
, bo_refs
, sizeof(bo_refs
)/sizeof(bo_refs
[0]));
200 /* Wait for the fence = 1 */
201 BEGIN_NV04(push
, SUBC_BSP(0x10), 4);
202 PUSH_DATAh(push
, dec
->fence
->offset
);
203 PUSH_DATA (push
, dec
->fence
->offset
);
207 /* TODO: Use both halves of bitstream/vpring for alternating frames */
209 /* Kick off the BSP */
210 BEGIN_NV04(push
, SUBC_BSP(0x400), 20);
211 PUSH_DATA (push
, dec
->bitstream
->offset
>> 8);
212 PUSH_DATA (push
, (dec
->bitstream
->offset
>> 8) + 7);
213 PUSH_DATA (push
, dec
->bitstream
->size
/ 2 - 0x700);
214 PUSH_DATA (push
, (dec
->bitstream
->offset
>> 8) + 6);
216 PUSH_DATA (push
, dec
->mbring
->offset
>> 8);
217 PUSH_DATA (push
, dec
->frame_size
);
218 PUSH_DATA (push
, (dec
->mbring
->offset
+ dec
->frame_size
) >> 8);
219 PUSH_DATA (push
, dec
->vpring
->offset
>> 8);
220 PUSH_DATA (push
, dec
->vpring
->size
/ 2);
221 PUSH_DATA (push
, dec
->vpring_residual
);
222 PUSH_DATA (push
, dec
->vpring_ctrl
);
224 PUSH_DATA (push
, dec
->vpring_residual
);
225 PUSH_DATA (push
, dec
->vpring_residual
+ dec
->vpring_ctrl
);
226 PUSH_DATA (push
, dec
->vpring_deblock
);
227 PUSH_DATA (push
, (dec
->vpring
->offset
+ dec
->vpring_ctrl
+
228 dec
->vpring_residual
+ dec
->vpring_deblock
) >> 8);
229 PUSH_DATA (push
, 0x654321);
231 PUSH_DATA (push
, 0x100008);
233 BEGIN_NV04(push
, SUBC_BSP(0x620), 2);
237 BEGIN_NV04(push
, SUBC_BSP(0x300), 1);
240 /* Write fence = 2, intr */
241 BEGIN_NV04(push
, SUBC_BSP(0x610), 3);
242 PUSH_DATAh(push
, dec
->fence
->offset
);
243 PUSH_DATA (push
, dec
->fence
->offset
);
246 BEGIN_NV04(push
, SUBC_BSP(0x304), 1);
247 PUSH_DATA (push
, 0x101);