r600g: use CP DMA for buffer clears on evergreen+
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_video.h
1 /*
2 * Copyright 2011-2013 Maarten Lankhorst
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0_context.h"
24 #include "nvc0_screen.h"
25
26 #include "vl/vl_decoder.h"
27 #include "vl/vl_video_buffer.h"
28 #include "vl/vl_types.h"
29
30 #include "util/u_video.h"
31
32 #define SLICE_SIZE 0x200
33 #define VP_OFFSET 0x200
34 #define COMM_OFFSET 0x500
35
36 //#define NVC0_DEBUG_FENCE 1
37
38 #ifdef NVC0_DEBUG_FENCE
39 # define NVC0_VIDEO_QDEPTH 1
40 #else
41 # define NVC0_VIDEO_QDEPTH 2
42 #endif
43
44 #define SUBC_BSP(m) dec->bsp_idx, (m)
45 #define SUBC_VP(m) dec->vp_idx, (m)
46 #define SUBC_PPP(m) dec->ppp_idx, (m)
47
48 union pipe_desc {
49 struct pipe_picture_desc *base;
50 struct pipe_mpeg12_picture_desc *mpeg12;
51 struct pipe_mpeg4_picture_desc *mpeg4;
52 struct pipe_vc1_picture_desc *vc1;
53 struct pipe_h264_picture_desc *h264;
54 };
55
56 struct nvc0_video_buffer {
57 struct pipe_video_buffer base;
58 unsigned num_planes, valid_ref;
59 struct pipe_resource *resources[VL_NUM_COMPONENTS];
60 struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS];
61 struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS];
62 struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2];
63 };
64
65 struct nvc0_decoder {
66 struct pipe_video_decoder base;
67 struct nouveau_client *client;
68 struct nouveau_object *channel[3], *bsp, *vp, *ppp;
69 struct nouveau_pushbuf *pushbuf[3];
70
71 #ifdef NVC0_DEBUG_FENCE
72 /* dump fence and comm, as needed.. */
73 unsigned *fence_map;
74 struct comm *comm;
75
76 struct nouveau_bo *fence_bo;
77 #endif
78
79 struct nouveau_bo *fw_bo, *bitplane_bo;
80
81 // array size max_references + 2, contains unpostprocessed images
82 // added at the end of ref_bo is a tmp array
83 // tmp is an array for h264, with each member being used for a ref frame or current
84 // target.. size = (((mb(w)*((mb(h)+1)&~1))+3)>>2)<<8 * (max_references+1)
85 // for other codecs, it simply seems that size = w*h is enough
86 // unsure what it's supposed to contain..
87 struct nouveau_bo *ref_bo;
88
89 struct nouveau_bo *inter_bo[2];
90
91 struct nouveau_bo *bsp_bo[NVC0_VIDEO_QDEPTH];
92
93 // bo's used by each cycle:
94
95 // bsp_bo: contains raw bitstream data and parameters for BSP and VP.
96 // inter_bo: contains data shared between BSP and VP
97 // ref_bo: reference image data, used by PPP and VP
98 // bitplane_bo: contain bitplane data (similar to ref_bo), used by BSP only
99 // fw_bo: used by VP only.
100
101 // Needed amount of copies in optimal case:
102 // 2 copies of inter_bo, VP would process the last inter_bo, while BSP is
103 // writing out a new set.
104 // NVC0_VIDEO_QDEPTH copies of bsp_bo. We don't want to block the pipeline ever,
105 // and give shaders a chance to run as well.
106
107 struct {
108 struct nvc0_video_buffer *vidbuf;
109 unsigned last_used;
110 unsigned field_pic_flag : 1;
111 unsigned decoded_top : 1;
112 unsigned decoded_bottom : 1;
113 } refs[17];
114 unsigned fence_seq, fw_sizes, last_frame_num, tmp_stride, ref_stride;
115
116 unsigned bsp_idx, vp_idx, ppp_idx;
117 };
118
119 struct comm {
120 uint32_t bsp_cur_index; // 000
121 uint32_t byte_ofs; // 004
122 uint32_t status[0x10]; // 008
123 uint32_t pos[0x10]; // 048
124 uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
125
126 uint32_t pvp_cur_index; // 100
127 uint32_t acked_byte_ofs; // 104
128 uint32_t status_vp[0x10]; // 108
129 uint16_t mb_y[0x10]; //148
130 uint32_t pvp_stage; // 168 0xeeXX
131 uint16_t parse_endpos_index; // 16c
132 uint16_t irq_index; // 16e
133 uint8_t irq_470[0x10]; // 170
134 uint32_t irq_pos[0x10]; // 180
135 uint32_t parse_endpos[0x10]; // 1c0
136 };
137
138 static INLINE uint32_t nvc0_video_align(uint32_t h)
139 {
140 return ((h+0x3f)&~0x3f);
141 };
142
143 static INLINE uint32_t mb(uint32_t coord)
144 {
145 return (coord + 0xf)>>4;
146 }
147
148 static INLINE uint32_t mb_half(uint32_t coord)
149 {
150 return (coord + 0x1f)>>5;
151 }
152
153 static INLINE uint64_t
154 nvc0_video_addr(struct nvc0_decoder *dec, struct nvc0_video_buffer *target)
155 {
156 uint64_t ret;
157 if (target)
158 ret = dec->ref_stride * target->valid_ref;
159 else
160 ret = dec->ref_stride * (dec->base.max_references+1);
161 return dec->ref_bo->offset + ret;
162 }
163
164 static INLINE void
165 nvc0_decoder_ycbcr_offsets(struct nvc0_decoder *dec, uint32_t *y2,
166 uint32_t *cbcr, uint32_t *cbcr2)
167 {
168 uint32_t w = mb(dec->base.width), size;
169 *y2 = mb_half(dec->base.height)*w;
170 *cbcr = *y2 * 2;
171 *cbcr2 = *cbcr + w * (nvc0_video_align(dec->base.height)>>6);
172
173 /* The check here should never fail because it means a bug
174 * in the code rather than a bug in hardware..
175 */
176 size = (2 * (*cbcr2 - *cbcr) + *cbcr) << 8;
177 if (size > dec->ref_stride) {
178 debug_printf("Overshot ref_stride (%u) with size %u and ofs (%u,%u,%u)\n",
179 dec->ref_stride, size, *y2<<8, *cbcr<<8, *cbcr2<<8);
180 *y2 = *cbcr = *cbcr2 = 0;
181 assert(size <= dec->ref_stride);
182 }
183 }
184
185 static INLINE void
186 nvc0_decoder_inter_sizes(struct nvc0_decoder *dec, uint32_t slice_count,
187 uint32_t *slice_size, uint32_t *bucket_size,
188 uint32_t *ring_size)
189 {
190 *slice_size = (SLICE_SIZE * slice_count)>>8;
191 if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG12)
192 *bucket_size = 0;
193 else
194 *bucket_size = mb(dec->base.width) * 3;
195 *ring_size = (dec->inter_bo[0]->size >> 8) - *bucket_size - *slice_size;
196 }
197
198 extern unsigned
199 nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc,
200 struct nvc0_video_buffer *target,
201 unsigned comm_seq, unsigned num_buffers,
202 const void *const *data, const unsigned *num_bytes,
203 unsigned *vp_caps, unsigned *is_ref,
204 struct nvc0_video_buffer *refs[16]);
205
206 extern void nvc0_decoder_vp_caps(struct nvc0_decoder *dec,
207 union pipe_desc desc,
208 struct nvc0_video_buffer *target,
209 unsigned comm_seq,
210 unsigned *caps, unsigned *is_ref,
211 struct nvc0_video_buffer *refs[16]);
212
213 extern void
214 nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc,
215 struct nvc0_video_buffer *target, unsigned comm_seq,
216 unsigned caps, unsigned is_ref,
217 struct nvc0_video_buffer *refs[16]);
218
219 extern void
220 nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc,
221 struct nvc0_video_buffer *target, unsigned comm_seq);