nvc0: add support for accelerated video decoding through the dedicated engines
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_video_vp.c
1 /*
2 * Copyright 2011-2013 Maarten Lankhorst
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nvc0_video.h"
24 #include <sys/mman.h>
25
26 struct mpeg12_picparm_vp {
27 uint16_t width; // 00 in mb units
28 uint16_t height; // 02 in mb units
29
30 uint32_t unk04; // 04 stride for Y?
31 uint32_t unk08; // 08 stride for CbCr?
32
33 uint32_t ofs[6]; // 1c..20 ofs
34 uint32_t bucket_size; // 24
35 uint32_t inter_ring_data_size; // 28
36 uint16_t unk2c; // 2c
37 uint16_t alternate_scan; // 2e
38 uint16_t unk30; // 30 not seen set yet
39 uint16_t picture_structure; // 32
40 uint16_t pad2[3];
41 uint16_t unk3a; // 3a set on I frame?
42
43 uint32_t f_code[4]; // 3c
44 uint32_t picture_coding_type; // 4c
45 uint32_t intra_dc_precision; // 50
46 uint32_t q_scale_type; // 54
47 uint32_t top_field_first; // 58
48 uint32_t full_pel_forward_vector; // 5c
49 uint32_t full_pel_backward_vector; // 60
50 uint8_t intra_quantizer_matrix[0x40]; // 64
51 uint8_t non_intra_quantizer_matrix[0x40]; // a4
52 };
53
54 struct mpeg4_picparm_vp {
55 uint32_t width; // 00 in normal units
56 uint32_t height; // 04 in normal units
57 uint32_t unk08; // stride 1
58 uint32_t unk0c; // stride 2
59 uint32_t ofs[6]; // 10..24 ofs
60 uint32_t bucket_size; // 28
61 uint32_t pad1; // 2c, pad
62 uint32_t pad2; // 30
63 uint32_t inter_ring_data_size; // 34
64
65 uint32_t trd[2]; // 38, 3c
66 uint32_t trb[2]; // 40, 44
67 uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
68 uint16_t f_code_fw; // 4c
69 uint16_t f_code_bw; // 4e
70 uint8_t interlaced; // 50
71
72 uint8_t quant_type; // bool, written to 528
73 uint8_t quarter_sample; // bool, written to 548
74 uint8_t short_video_header; // bool, negated written to 528 shifted by 1
75 uint8_t u54; // bool, written to 0x740
76 uint8_t vop_coding_type; // 55
77 uint8_t rounding_control; // 56
78 uint8_t alternate_vertical_scan_flag; // 57 bool
79 uint8_t top_field_first; // bool, written to vuc
80
81 uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
82 uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY?
83
84 uint32_t intra[0x10]; // 9c
85 uint32_t non_intra[0x10]; // bc
86 // udc..uff pad?
87 };
88
89 // Full version, with data pumped from BSP
90 struct vc1_picparm_vp {
91 uint32_t bucket_size; // 00
92 uint32_t pad; // 04
93
94 uint32_t inter_ring_data_size; // 08
95 uint32_t unk0c; // stride 1
96 uint32_t unk10; // stride 2
97 uint32_t ofs[6]; // 14..28 ofs
98
99 uint16_t width; // 2c
100 uint16_t height; // 2e
101
102 uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
103 uint8_t loopfilter; // 31 written into vuc
104 uint8_t fastuvmc; // 32, written into vuc
105 uint8_t dquant; // 33
106
107 uint8_t overlap; // 34
108 uint8_t quantizer; // 35
109 uint8_t u36; // 36, bool
110 uint8_t pad2; // 37, to align to 0x38
111 };
112
113 struct h264_picparm_vp { // 700..a00
114 uint16_t width, height;
115 uint32_t stride1, stride2; // 04 08
116 uint32_t ofs[6]; // 0c..24 in-image offset
117
118 uint32_t u24; // nfi ac8 ?
119 uint32_t bucket_size; // 28 bucket size
120 uint32_t inter_ring_data_size; // 2c
121
122 unsigned f0 : 1; // 0 0x01: into 640 shifted by 3, 540 shifted by 5, half size something?
123 unsigned f1 : 1; // 1 0x02: into vuc ofs 56
124 unsigned weighted_pred_flag : 1; // 2 0x04
125 unsigned f3 : 1; // 3 0x08: into vuc ofs 68
126 unsigned is_reference : 1; // 4
127 unsigned interlace : 1; // 5 field_pic_flag
128 unsigned bottom_field_flag : 1; // 6
129 unsigned f7 : 1; // 7 0x80: nfi yet
130
131 signed log2_max_frame_num_minus4 : 4; // 31 0..3
132 unsigned u31_45 : 2; // 31 4..5
133 unsigned pic_order_cnt_type : 2; // 31 6..7
134 signed pic_init_qp_minus26 : 6; // 32 0..5
135 signed chroma_qp_index_offset : 5; // 32 6..10
136 signed second_chroma_qp_index_offset : 5; // 32 11..15
137
138 unsigned weighted_bipred_idc : 2; // 34 0..1
139 unsigned fifo_dec_index : 7; // 34 2..8
140 unsigned tmp_idx : 5; // 34 9..13
141 unsigned frame_number : 16; // 34 14..29
142 unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
143 unsigned u34_3131 : 1; // 34 31..31 pad?
144
145 uint32_t field_order_cnt[2]; // 38, 3c
146
147 struct { // 40
148 // 0x00223102
149 // nfi (needs: top_is_reference, bottom_is_reference, is_long_term, maybe some other state that was saved..
150 unsigned fifo_idx : 7; // 00 0..6
151 unsigned tmp_idx : 5; // 00 7..11
152 unsigned unk12 : 1; // 00 12 not seen yet, but set, maybe top_is_reference
153 unsigned unk13 : 1; // 00 13 not seen yet, but set, maybe bottom_is_reference?
154 unsigned unk14 : 1; // 00 14 skipped?
155 unsigned notseenyet : 1; // 00 15 pad?
156 unsigned unk16 : 1; // 00 16
157 unsigned unk17 : 4; // 00 17..20
158 unsigned unk21 : 4; // 00 21..24
159 unsigned pad : 7; // 00 d25..31
160
161 uint32_t field_order_cnt[2]; // 04,08
162 uint32_t frame_idx; // 0c
163 } refs[0x10];
164
165 uint8_t m4x4[6][16]; // 140
166 uint8_t m8x8[2][64]; // 1a0
167 uint32_t u220; // 220 number of extra reorder_list to append?
168 uint8_t u224[0x20]; // 224..244 reorder_list append ?
169 uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
170 };
171
172 static void
173 nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffer *refs[16], unsigned seq, struct nvc0_video_buffer *target)
174 {
175 unsigned h264 = u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG4_AVC;
176 unsigned i, idx, empty_spot = dec->base.max_references + 1;
177 for (i = 0; i < dec->base.max_references; ++i) {
178 if (!refs[i])
179 continue;
180
181 idx = refs[i]->valid_ref;
182 //debug_printf("ref[%i] %p in slot %i\n", i, refs[i], idx);
183 assert(target != refs[i] ||
184 (h264 && empty_spot &&
185 (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top)));
186 if (target == refs[i])
187 empty_spot = 0;
188 assert(!h264 ||
189 dec->refs[idx].last_used == seq - 1);
190
191 if (dec->refs[idx].vidbuf != refs[i]) {
192 debug_printf("%p is not a real ref\n", refs[i]);
193 // FIXME: Maybe do m2mf copy here if a application really depends on it?
194 continue;
195 }
196
197 assert(dec->refs[idx].vidbuf == refs[i]);
198 dec->refs[idx].last_used = seq;
199 }
200 if (!empty_spot)
201 return;
202
203 /* Try to find a real empty spot first, there should be one..
204 */
205 for (i = 0; i < dec->base.max_references + 1; ++i) {
206 if (dec->refs[i].last_used < seq) {
207 if (!dec->refs[i].vidbuf) {
208 empty_spot = i;
209 break;
210 }
211 if (empty_spot < dec->base.max_references+1 &&
212 dec->refs[empty_spot].last_used < dec->refs[i].last_used)
213 continue;
214 empty_spot = i;
215 }
216 }
217 assert(empty_spot < dec->base.max_references+1);
218 dec->refs[empty_spot].last_used = seq;
219 // debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, i);
220 dec->refs[empty_spot].vidbuf = target;
221 dec->refs[empty_spot].decoded_bottom = dec->refs[empty_spot].decoded_top = 0;
222 target->valid_ref = empty_spot;
223 }
224
225 static void
226 nvc0_decoder_kick_ref(struct nvc0_decoder *dec, struct nvc0_video_buffer *target)
227 {
228 dec->refs[target->valid_ref].vidbuf = NULL;
229 dec->refs[target->valid_ref].last_used = 0;
230 // debug_printf("Unreffed %p\n", target);
231 }
232
233 static uint32_t
234 nvc0_decoder_fill_picparm_mpeg12_vp(struct nvc0_decoder *dec,
235 struct pipe_mpeg12_picture_desc *desc,
236 struct nvc0_video_buffer *refs[16],
237 unsigned *is_ref,
238 char *map)
239 {
240 struct mpeg12_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
241 uint32_t i, ret = 0x01010, ring; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
242 assert(!(dec->base.width & 0xf));
243 *is_ref = desc->picture_coding_type <= 2;
244
245 if (dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
246 pic_vp->picture_structure = 3;
247 else
248 pic_vp->picture_structure = desc->picture_structure;
249
250 assert(desc->picture_structure != 4);
251 if (desc->picture_structure == 4) // Untested, but should work
252 ret |= 0x100;
253 pic_vp->width = mb(dec->base.width);
254 pic_vp->height = mb(dec->base.height);
255 pic_vp->unk08 = pic_vp->unk04 = (dec->base.width+0xf)&~0xf; // Stride
256
257 nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]);
258 pic_vp->ofs[5] = pic_vp->ofs[3];
259 pic_vp->ofs[0] = pic_vp->ofs[2] = 0;
260 nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size);
261
262 pic_vp->alternate_scan = desc->alternate_scan;
263 pic_vp->pad2[0] = pic_vp->pad2[1] = pic_vp->pad2[2] = 0;
264 pic_vp->unk30 = desc->picture_structure < 3 && (desc->picture_structure == 2 - desc->top_field_first);
265 pic_vp->unk3a = (desc->picture_coding_type == 1);
266 for (i = 0; i < 4; ++i)
267 pic_vp->f_code[i] = desc->f_code[i/2][i%2] + 1; // FU
268 pic_vp->picture_coding_type = desc->picture_coding_type;
269 pic_vp->intra_dc_precision = desc->intra_dc_precision;
270 pic_vp->q_scale_type = desc->q_scale_type;
271 pic_vp->top_field_first = desc->top_field_first;
272 pic_vp->full_pel_forward_vector = desc->full_pel_forward_vector;
273 pic_vp->full_pel_backward_vector = desc->full_pel_backward_vector;
274 memcpy(pic_vp->intra_quantizer_matrix, desc->intra_matrix, 0x40);
275 memcpy(pic_vp->non_intra_quantizer_matrix, desc->non_intra_matrix, 0x40);
276 memcpy(map, pic_vp, sizeof(*pic_vp));
277 refs[0] = (struct nvc0_video_buffer *)desc->ref[0];
278 refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1];
279 return ret | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1);
280 }
281
282 static uint32_t
283 nvc0_decoder_fill_picparm_mpeg4_vp(struct nvc0_decoder *dec,
284 struct pipe_mpeg4_picture_desc *desc,
285 struct nvc0_video_buffer *refs[16],
286 unsigned *is_ref,
287 char *map)
288 {
289 struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
290 uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
291 assert(!(dec->base.width & 0xf));
292 *is_ref = desc->vop_coding_type <= 1;
293
294 pic_vp->width = dec->base.width;
295 pic_vp->height = mb(dec->base.height)<<4;
296 pic_vp->unk0c = pic_vp->unk08 = mb(dec->base.width)<<4; // Stride
297
298 nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]);
299 pic_vp->ofs[5] = pic_vp->ofs[3];
300 pic_vp->ofs[0] = pic_vp->ofs[2] = 0;
301 pic_vp->pad1 = pic_vp->pad2 = 0;
302 nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size);
303
304 pic_vp->trd[0] = desc->trd[0];
305 pic_vp->trd[1] = desc->trd[1];
306 pic_vp->trb[0] = desc->trb[0];
307 pic_vp->trb[1] = desc->trb[1];
308 pic_vp->u48 = 0; // Codec?
309 pic_vp->pad1 = pic_vp->pad2 = 0;
310 pic_vp->f_code_fw = desc->vop_fcode_forward;
311 pic_vp->f_code_bw = desc->vop_fcode_backward;
312 pic_vp->interlaced = desc->interlaced;
313 pic_vp->quant_type = desc->quant_type;
314 pic_vp->quarter_sample = desc->quarter_sample;
315 pic_vp->short_video_header = desc->short_video_header;
316 pic_vp->u54 = 0;
317 pic_vp->vop_coding_type = desc->vop_coding_type;
318 pic_vp->rounding_control = desc->rounding_control;
319 pic_vp->alternate_vertical_scan_flag = desc->alternate_vertical_scan_flag;
320 pic_vp->top_field_first = desc->top_field_first;
321
322 memcpy(pic_vp->intra, desc->intra_matrix, 0x40);
323 memcpy(pic_vp->non_intra, desc->non_intra_matrix, 0x40);
324 memcpy(map, pic_vp, sizeof(*pic_vp));
325 refs[0] = (struct nvc0_video_buffer *)desc->ref[0];
326 refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1];
327 return ret;
328 }
329
330 static uint32_t
331 nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec,
332 const struct pipe_h264_picture_desc *d,
333 struct nvc0_video_buffer *refs[16],
334 unsigned *is_ref,
335 char *map)
336 {
337 struct h264_picparm_vp stub_h = {}, *h = &stub_h;
338 unsigned ring, i, j = 0;
339 assert(offsetof(struct h264_picparm_vp, u224) == 0x224);
340 *is_ref = d->is_reference;
341 assert(!d->frame_num || dec->last_frame_num + 1 == d->frame_num || dec->last_frame_num == d->frame_num);
342 dec->last_frame_num = d->frame_num;
343
344 h->width = mb(dec->base.width);
345 h->height = mb(dec->base.height);
346 h->stride1 = h->stride2 = mb(dec->base.width)*16;
347 nvc0_decoder_ycbcr_offsets(dec, &h->ofs[1], &h->ofs[3], &h->ofs[4]);
348 h->ofs[5] = h->ofs[3];
349 h->ofs[0] = h->ofs[2] = 0;
350 h->u24 = dec->tmp_stride >> 8;
351 assert(h->u24);
352 nvc0_decoder_inter_sizes(dec, 1, &ring, &h->bucket_size, &h->inter_ring_data_size);
353
354 h->u220 = 0;
355 h->f0 = d->mb_adaptive_frame_field_flag;
356 h->f1 = d->direct_8x8_inference_flag;
357 h->weighted_pred_flag = d->weighted_pred_flag;
358 h->f3 = d->constrained_intra_pred_flag;
359 h->is_reference = d->is_reference;
360 h->interlace = d->field_pic_flag;
361 h->bottom_field_flag = d->bottom_field_flag;
362 h->f7 = 0; // TODO: figure out when set..
363 h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4;
364 h->u31_45 = 1;
365
366 h->pic_order_cnt_type = d->pic_order_cnt_type;
367 h->pic_init_qp_minus26 = d->pic_init_qp_minus26;
368 h->chroma_qp_index_offset = d->chroma_qp_index_offset;
369 h->second_chroma_qp_index_offset = d->second_chroma_qp_index_offset;
370 h->weighted_bipred_idc = d->weighted_bipred_idc;
371 h->tmp_idx = 0; // set in h264_vp_refs below
372 h->fifo_dec_index = 0; // always set to 0 to be fifo compatible with other codecs
373 h->frame_number = d->frame_num;
374 h->u34_3030 = h->u34_3131 = 0;
375 h->field_order_cnt[0] = d->field_order_cnt[0];
376 h->field_order_cnt[1] = d->field_order_cnt[1];
377 memset(h->refs, 0, sizeof(h->refs));
378 memcpy(h->m4x4, d->scaling_lists_4x4, sizeof(h->m4x4) + sizeof(h->m8x8));
379 h->u220 = 0;
380 for (i = 0; i < d->num_ref_frames; ++i) {
381 if (!d->ref[i])
382 break;
383 refs[j] = (struct nvc0_video_buffer *)d->ref[i];
384 h->refs[j].fifo_idx = j + 1;
385 h->refs[j].tmp_idx = refs[j]->valid_ref;
386 h->refs[j].field_order_cnt[0] = d->field_order_cnt_list[i][0];
387 h->refs[j].field_order_cnt[1] = d->field_order_cnt_list[i][1];
388 h->refs[j].frame_idx = d->frame_num_list[i];
389 if (!dec->refs[refs[j]->valid_ref].field_pic_flag) {
390 h->refs[j].unk12 = d->top_is_reference[i];
391 h->refs[j].unk13 = d->bottom_is_reference[i];
392 }
393 h->refs[j].unk14 = 0;
394 h->refs[j].notseenyet = 0;
395 h->refs[j].unk16 = dec->refs[refs[j]->valid_ref].field_pic_flag;
396 h->refs[j].unk17 = dec->refs[refs[j]->valid_ref].decoded_top &&
397 d->top_is_reference[i];
398 h->refs[j].unk21 = dec->refs[refs[j]->valid_ref].decoded_bottom &&
399 d->bottom_is_reference[i];
400 h->refs[j].pad = 0;
401 assert(!d->is_long_term[i]);
402 j++;
403 }
404 for (; i < 16; ++i)
405 assert(!d->ref[i]);
406 assert(d->num_ref_frames <= dec->base.max_references);
407
408 for (; i < d->num_ref_frames; ++i)
409 h->refs[j].unk16 = d->field_pic_flag;
410 *(struct h264_picparm_vp *)map = *h;
411
412 return 0x1113;
413 }
414
415 static void
416 nvc0_decoder_fill_picparm_h264_vp_refs(struct nvc0_decoder *dec,
417 struct pipe_h264_picture_desc *d,
418 struct nvc0_video_buffer *refs[16],
419 struct nvc0_video_buffer *target,
420 char *map)
421 {
422 struct h264_picparm_vp *h = (struct h264_picparm_vp *)map;
423 assert(dec->refs[target->valid_ref].vidbuf == target);
424 // debug_printf("Target: %p\n", target);
425
426 h->tmp_idx = target->valid_ref;
427 dec->refs[target->valid_ref].field_pic_flag = d->field_pic_flag;
428 if (!d->field_pic_flag || d->bottom_field_flag)
429 dec->refs[target->valid_ref].decoded_bottom = 1;
430 if (!d->field_pic_flag || !d->bottom_field_flag)
431 dec->refs[target->valid_ref].decoded_top = 1;
432 }
433
434 static uint32_t
435 nvc0_decoder_fill_picparm_vc1_vp(struct nvc0_decoder *dec,
436 struct pipe_vc1_picture_desc *d,
437 struct nvc0_video_buffer *refs[16],
438 unsigned *is_ref,
439 char *map)
440 {
441 struct vc1_picparm_vp *vc = (struct vc1_picparm_vp *)map;
442 unsigned ring;
443 assert(dec->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE);
444 *is_ref = d->picture_type <= 1;
445
446 nvc0_decoder_ycbcr_offsets(dec, &vc->ofs[1], &vc->ofs[3], &vc->ofs[4]);
447 vc->ofs[5] = vc->ofs[3];
448 vc->ofs[0] = vc->ofs[2] = 0;
449 vc->width = dec->base.width;
450 vc->height = mb(dec->base.height)<<4;
451 vc->unk0c = vc->unk10 = mb(dec->base.width)<<4; // Stride
452 vc->pad = vc->pad2 = 0;
453 nvc0_decoder_inter_sizes(dec, 1, &ring, &vc->bucket_size, &vc->inter_ring_data_size);
454 vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE;
455 vc->loopfilter = d->loopfilter;
456 vc->fastuvmc = d->fastuvmc;
457 vc->dquant = d->dquant;
458 vc->overlap = d->overlap;
459 vc->quantizer = d->quantizer;
460 vc->u36 = 0; // ? No idea what this one is..
461 refs[0] = (struct nvc0_video_buffer *)d->ref[0];
462 refs[!!refs[0]] = (struct nvc0_video_buffer *)d->ref[1];
463 return 0x12;
464 }
465
466 #if NVC0_DEBUG_FENCE
467 static void dump_comm_vp(struct nvc0_decoder *dec, struct comm *comm, u32 comm_seq,
468 struct nouveau_bo *inter_bo, unsigned slice_size)
469 {
470 unsigned i, idx = comm->pvp_cur_index & 0xf;
471 debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
472 #if 0
473 debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
474 debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
475
476 for (i = 0; i != comm->irq_index; ++i)
477 debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
478 for (i = 0; i != comm->parse_endpos_index; ++i)
479 debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
480 #endif
481 debug_printf("mb_y = %u\n", comm->mb_y[idx]);
482 if (comm->status_vp[idx] == 1)
483 return;
484
485 if ((comm->pvp_stage & 0xff) != 0xff) {
486 unsigned *map;
487 assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
488 map = inter_bo->map;
489 for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
490 debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
491 }
492 munmap(inter_bo->map, inter_bo->size);
493 inter_bo->map = NULL;
494 }
495 assert((comm->pvp_stage & 0xff) == 0xff);
496 }
497 #endif
498
499 void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, union pipe_desc desc,
500 struct nvc0_video_buffer *target, unsigned comm_seq,
501 unsigned *caps, unsigned *is_ref,
502 struct nvc0_video_buffer *refs[16])
503 {
504 struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
505 enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
506 char *vp = bsp_bo->map + VP_OFFSET;
507
508 switch (codec){
509 case PIPE_VIDEO_CODEC_MPEG12:
510 *caps = nvc0_decoder_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp);
511 nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
512 return;
513 case PIPE_VIDEO_CODEC_MPEG4:
514 *caps = nvc0_decoder_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp);
515 nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
516 return;
517 case PIPE_VIDEO_CODEC_VC1: {
518 *caps = nvc0_decoder_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp);
519 nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
520 return;
521 }
522 case PIPE_VIDEO_CODEC_MPEG4_AVC: {
523 *caps = nvc0_decoder_fill_picparm_h264_vp(dec, desc.h264, refs, is_ref, vp);
524 nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
525 nvc0_decoder_fill_picparm_h264_vp_refs(dec, desc.h264, refs, target, vp);
526 return;
527 }
528 default: assert(0); return;
529 }
530 }
531
532 void
533 nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc,
534 struct nvc0_video_buffer *target, unsigned comm_seq,
535 unsigned caps, unsigned is_ref,
536 struct nvc0_video_buffer *refs[16])
537 {
538 struct nouveau_pushbuf *push = dec->pushbuf[1];
539 uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
540 uint32_t slice_size, bucket_size, ring_size, i;
541 enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
542 struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
543 struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
544 u32 fence_extra = 0, codec_extra = 0;
545 struct nouveau_pushbuf_refn bo_refs[] = {
546 { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
547 { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
548 { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
549 #ifdef NVC0_DEBUG_FENCE
550 { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
551 #endif
552 { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
553 };
554 int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
555
556 #if NVC0_DEBUG_FENCE
557 fence_extra = 4;
558 #endif
559
560 if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) {
561 nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
562 codec_extra += 2;
563 } else
564 nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
565
566 if (dec->base.max_references > 2)
567 codec_extra += 1 + (dec->base.max_references - 2);
568
569 pic_addr[16] = nvc0_video_addr(dec, target) >> 8;
570 last_addr = null_addr = nvc0_video_addr(dec, NULL) >> 8;
571
572 for (i = 0; i < dec->base.max_references; ++i) {
573 if (!refs[i])
574 pic_addr[i] = last_addr;
575 else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
576 last_addr = pic_addr[i] = nvc0_video_addr(dec, refs[i]) >> 8;
577 else
578 pic_addr[i] = null_addr;
579 }
580 if (!is_ref)
581 nvc0_decoder_kick_ref(dec, target);
582
583 PUSH_SPACE(push, 8 + 3 * (codec != PIPE_VIDEO_CODEC_MPEG12) +
584 6 + codec_extra + fence_extra + 2);
585
586 nouveau_pushbuf_refn(push, bo_refs, num_refs);
587
588 bsp_addr = bsp_bo->offset >> 8;
589 #if NVC0_DEBUG_FENCE
590 comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
591 #else
592 comm_addr = bsp_addr + (COMM_OFFSET>>8);
593 #endif
594 inter_addr = inter_bo->offset >> 8;
595 if (dec->fw_bo)
596 ucode_addr = dec->fw_bo->offset >> 8;
597 else
598 ucode_addr = 0;
599
600 BEGIN_NVC0(push, SUBC_VP(0x700), 7);
601 PUSH_DATA (push, caps); // 700
602 PUSH_DATA (push, comm_seq); // 704
603 PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0
604 PUSH_DATA (push, dec->fw_sizes); // 70c
605 PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
606 PUSH_DATA (push, inter_addr); // 714 inter_parm
607 PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
608
609 if (bucket_size) {
610 uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
611
612 BEGIN_NVC0(push, SUBC_VP(0x71c), 2);
613 PUSH_DATA (push, tmpimg_addr >> 8); // 71c
614 PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
615 }
616
617 BEGIN_NVC0(push, SUBC_VP(0x724), 5);
618 PUSH_DATA (push, comm_addr); // 724
619 PUSH_DATA (push, ucode_addr); // 728
620 PUSH_DATA (push, pic_addr[16]); // 734
621 PUSH_DATA (push, pic_addr[0]); // 72c
622 PUSH_DATA (push, pic_addr[1]); // 730
623
624 if (dec->base.max_references > 2) {
625 int i;
626
627 BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2);
628 for (i = 2; i < dec->base.max_references; ++i) {
629 assert(0x400 + (i - 2) * 4 < 0x438);
630 PUSH_DATA (push, pic_addr[i]);
631 }
632 }
633
634 if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) {
635 BEGIN_NVC0(push, SUBC_VP(0x438), 1);
636 PUSH_DATA (push, desc.h264->slice_count);
637 }
638
639 //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
640
641 #if NVC0_DEBUG_FENCE
642 BEGIN_NVC0(push, SUBC_VP(0x240), 3);
643 PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
644 PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
645 PUSH_DATA (push, dec->fence_seq);
646
647 BEGIN_NVC0(push, SUBC_VP(0x300), 1);
648 PUSH_DATA (push, 1);
649 PUSH_KICK(push);
650
651 {
652 unsigned spin = 0;
653 do {
654 usleep(100);
655 if ((spin++ & 0xff) == 0xff) {
656 debug_printf("vp%u: %u\n", dec->fence_seq, dec->fence_map[4]);
657 dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
658 }
659 } while (dec->fence_seq > dec->fence_map[4]);
660 }
661 dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
662 #else
663 BEGIN_NVC0(push, SUBC_VP(0x300), 1);
664 PUSH_DATA (push, 0);
665 PUSH_KICK (push);
666 #endif
667 }