nv50: use m2mf z pos instead of calculating offset manually
[mesa.git] / src / gallium / drivers / nv50 / nv50_transfer.c
1
2 #include "pipe/p_context.h"
3 #include "pipe/p_inlines.h"
4 #include "util/u_math.h"
5
6 #include "nv50_context.h"
7
8 struct nv50_transfer {
9 struct pipe_transfer base;
10 struct nouveau_bo *bo;
11 unsigned level_offset;
12 unsigned level_tiling;
13 int level_pitch;
14 int level_width;
15 int level_height;
16 int level_depth;
17 int level_x;
18 int level_y;
19 int level_z;
20 unsigned nblocksx;
21 unsigned nblocksy;
22 };
23
24 static void
25 nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
26 struct nouveau_bo *src_bo, unsigned src_offset,
27 int src_pitch, unsigned src_tile_mode,
28 int sx, int sy, int sz, int sw, int sh, int sd,
29 struct nouveau_bo *dst_bo, unsigned dst_offset,
30 int dst_pitch, unsigned dst_tile_mode,
31 int dx, int dy, int dz, int dw, int dh, int dd,
32 int cpp, int width, int height,
33 unsigned src_reloc, unsigned dst_reloc)
34 {
35 struct nv50_screen *screen = nv50_screen(pscreen);
36 struct nouveau_channel *chan = screen->m2mf->channel;
37 struct nouveau_grobj *m2mf = screen->m2mf;
38
39 src_reloc |= NOUVEAU_BO_RD;
40 dst_reloc |= NOUVEAU_BO_WR;
41
42 WAIT_RING (chan, 14);
43
44 if (!src_bo->tile_flags) {
45 BEGIN_RING(chan, m2mf,
46 NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
47 OUT_RING (chan, 1);
48 BEGIN_RING(chan, m2mf,
49 NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
50 OUT_RING (chan, src_pitch);
51 src_offset += (sy * src_pitch) + (sx * cpp);
52 } else {
53 BEGIN_RING(chan, m2mf,
54 NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 6);
55 OUT_RING (chan, 0);
56 OUT_RING (chan, src_tile_mode << 4);
57 OUT_RING (chan, sw * cpp);
58 OUT_RING (chan, sh);
59 OUT_RING (chan, sd);
60 OUT_RING (chan, sz); /* copying only 1 zslice per call */
61 }
62
63 if (!dst_bo->tile_flags) {
64 BEGIN_RING(chan, m2mf,
65 NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
66 OUT_RING (chan, 1);
67 BEGIN_RING(chan, m2mf,
68 NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
69 OUT_RING (chan, dst_pitch);
70 dst_offset += (dy * dst_pitch) + (dx * cpp);
71 } else {
72 BEGIN_RING(chan, m2mf,
73 NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 6);
74 OUT_RING (chan, 0);
75 OUT_RING (chan, dst_tile_mode << 4);
76 OUT_RING (chan, dw * cpp);
77 OUT_RING (chan, dh);
78 OUT_RING (chan, dd);
79 OUT_RING (chan, dz); /* copying only 1 zslice per call */
80 }
81
82 while (height) {
83 int line_count = height > 2047 ? 2047 : height;
84
85 WAIT_RING (chan, 15);
86 BEGIN_RING(chan, m2mf,
87 NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
88 OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
89 OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
90 BEGIN_RING(chan, m2mf,
91 NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
92 OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
93 OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
94 if (src_bo->tile_flags) {
95 BEGIN_RING(chan, m2mf,
96 NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1);
97 OUT_RING (chan, (sy << 16) | (sx * cpp));
98 } else {
99 src_offset += (line_count * src_pitch);
100 }
101 if (dst_bo->tile_flags) {
102 BEGIN_RING(chan, m2mf,
103 NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1);
104 OUT_RING (chan, (dy << 16) | (dx * cpp));
105 } else {
106 dst_offset += (line_count * dst_pitch);
107 }
108 BEGIN_RING(chan, m2mf,
109 NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
110 OUT_RING (chan, width * cpp);
111 OUT_RING (chan, line_count);
112 OUT_RING (chan, 0x00000101);
113 OUT_RING (chan, 0);
114 FIRE_RING (chan);
115
116 height -= line_count;
117 sy += line_count;
118 dy += line_count;
119 }
120 }
121
122 static INLINE unsigned
123 get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
124 {
125 unsigned tile_h = get_tile_height(tile_mode);
126 unsigned tile_d = get_tile_depth(tile_mode);
127
128 /* pitch_2d == to next slice within this volume-tile */
129 /* pitch_3d == to next slice in next 2D array of blocks */
130 unsigned pitch_2d = tile_h * 64;
131 unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
132
133 return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
134 }
135
136 static struct pipe_transfer *
137 nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
138 unsigned face, unsigned level, unsigned zslice,
139 enum pipe_transfer_usage usage,
140 unsigned x, unsigned y, unsigned w, unsigned h)
141 {
142 struct nouveau_device *dev = nouveau_screen(pscreen)->device;
143 struct nv50_miptree *mt = nv50_miptree(pt);
144 struct nv50_miptree_level *lvl = &mt->level[level];
145 struct nv50_transfer *tx;
146 unsigned nx, ny, image = 0;
147 int ret;
148
149 if (pt->target == PIPE_TEXTURE_CUBE)
150 image = face;
151
152 tx = CALLOC_STRUCT(nv50_transfer);
153 if (!tx)
154 return NULL;
155
156 pipe_texture_reference(&tx->base.texture, pt);
157 tx->nblocksx = pf_get_nblocksx(pt->format, u_minify(pt->width0, level));
158 tx->nblocksy = pf_get_nblocksy(pt->format, u_minify(pt->height0, level));
159 tx->base.width = w;
160 tx->base.height = h;
161 tx->base.stride = tx->nblocksx * pf_get_blocksize(pt->format);
162 tx->base.usage = usage;
163
164 tx->level_pitch = lvl->pitch;
165 tx->level_width = u_minify(mt->base.base.width0, level);
166 tx->level_height = u_minify(mt->base.base.height0, level);
167 tx->level_depth = u_minify(mt->base.base.depth0, level);
168 tx->level_offset = lvl->image_offset[image];
169 tx->level_tiling = lvl->tile_mode;
170 tx->level_z = zslice;
171 tx->level_x = pf_get_nblocksx(pt->format, x);
172 tx->level_y = pf_get_nblocksy(pt->format, y);
173 ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
174 tx->nblocksy * tx->base.stride, &tx->bo);
175 if (ret) {
176 FREE(tx);
177 return NULL;
178 }
179
180 if (usage & PIPE_TRANSFER_READ) {
181 nx = pf_get_nblocksx(pt->format, tx->base.width);
182 ny = pf_get_nblocksy(pt->format, tx->base.height);
183
184 nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
185 tx->level_pitch, tx->level_tiling,
186 x, y, zslice,
187 tx->nblocksx, tx->nblocksy,
188 tx->level_depth,
189 tx->bo, 0,
190 tx->base.stride, tx->bo->tile_mode,
191 0, 0, 0,
192 tx->nblocksx, tx->nblocksy, 1,
193 pf_get_blocksize(pt->format), nx, ny,
194 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
195 NOUVEAU_BO_GART);
196 }
197
198 return &tx->base;
199 }
200
201 static void
202 nv50_transfer_del(struct pipe_transfer *ptx)
203 {
204 struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
205 struct nv50_miptree *mt = nv50_miptree(ptx->texture);
206 struct pipe_texture *pt = ptx->texture;
207
208 unsigned nx = pf_get_nblocksx(pt->format, tx->base.width);
209 unsigned ny = pf_get_nblocksy(pt->format, tx->base.height);
210
211 if (ptx->usage & PIPE_TRANSFER_WRITE) {
212 struct pipe_screen *pscreen = pt->screen;
213
214 nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
215 tx->base.stride, tx->bo->tile_mode,
216 0, 0, 0,
217 tx->nblocksx, tx->nblocksy, 1,
218 mt->base.bo, tx->level_offset,
219 tx->level_pitch, tx->level_tiling,
220 tx->level_x, tx->level_y, tx->level_z,
221 tx->nblocksx, tx->nblocksy,
222 tx->level_depth,
223 pf_get_blocksize(pt->format), nx, ny,
224 NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
225 NOUVEAU_BO_GART);
226 }
227
228 nouveau_bo_ref(NULL, &tx->bo);
229 pipe_texture_reference(&ptx->texture, NULL);
230 FREE(ptx);
231 }
232
233 static void *
234 nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
235 {
236 struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
237 unsigned flags = 0;
238 int ret;
239
240 if (ptx->usage & PIPE_TRANSFER_WRITE)
241 flags |= NOUVEAU_BO_WR;
242 if (ptx->usage & PIPE_TRANSFER_READ)
243 flags |= NOUVEAU_BO_RD;
244
245 ret = nouveau_bo_map(tx->bo, flags);
246 if (ret)
247 return NULL;
248 return tx->bo->map;
249 }
250
251 static void
252 nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
253 {
254 struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
255
256 nouveau_bo_unmap(tx->bo);
257 }
258
259 void
260 nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
261 {
262 pscreen->get_tex_transfer = nv50_transfer_new;
263 pscreen->tex_transfer_destroy = nv50_transfer_del;
264 pscreen->transfer_map = nv50_transfer_map;
265 pscreen->transfer_unmap = nv50_transfer_unmap;
266 }
267
268 void
269 nv50_upload_sifc(struct nv50_context *nv50,
270 struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
271 unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
272 void *src, unsigned src_format, int src_pitch,
273 int x, int y, int w, int h, int cpp)
274 {
275 struct nouveau_channel *chan = nv50->screen->base.channel;
276 struct nouveau_grobj *eng2d = nv50->screen->eng2d;
277 struct nouveau_grobj *tesla = nv50->screen->tesla;
278 unsigned line_dwords = (w * cpp + 3) / 4;
279
280 reloc |= NOUVEAU_BO_WR;
281
282 WAIT_RING (chan, 32);
283
284 if (bo->tile_flags) {
285 BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
286 OUT_RING (chan, dst_format);
287 OUT_RING (chan, 0);
288 OUT_RING (chan, bo->tile_mode << 4);
289 OUT_RING (chan, 1);
290 OUT_RING (chan, 0);
291 } else {
292 BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
293 OUT_RING (chan, dst_format);
294 OUT_RING (chan, 1);
295 BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
296 OUT_RING (chan, dst_pitch);
297 }
298
299 BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4);
300 OUT_RING (chan, dst_w);
301 OUT_RING (chan, dst_h);
302 OUT_RELOCh(chan, bo, dst_offset, reloc);
303 OUT_RELOCl(chan, bo, dst_offset, reloc);
304
305 /* NV50_2D_OPERATION_SRCCOPY assumed already set */
306
307 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
308 OUT_RING (chan, 0);
309 OUT_RING (chan, src_format);
310 BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
311 OUT_RING (chan, w);
312 OUT_RING (chan, h);
313 OUT_RING (chan, 0);
314 OUT_RING (chan, 1);
315 OUT_RING (chan, 0);
316 OUT_RING (chan, 1);
317 OUT_RING (chan, 0);
318 OUT_RING (chan, x);
319 OUT_RING (chan, 0);
320 OUT_RING (chan, y);
321
322 while (h--) {
323 const uint32_t *p = src;
324 unsigned count = line_dwords;
325
326 while (count) {
327 unsigned nr = MIN2(count, 1792);
328
329 if (chan->pushbuf->remaining <= nr) {
330 FIRE_RING (chan);
331
332 BEGIN_RING(chan, eng2d,
333 NV50_2D_DST_ADDRESS_HIGH, 2);
334 OUT_RELOCh(chan, bo, dst_offset, reloc);
335 OUT_RELOCl(chan, bo, dst_offset, reloc);
336 }
337 assert(chan->pushbuf->remaining > nr);
338
339 BEGIN_RING(chan, eng2d,
340 NV50_2D_SIFC_DATA | (2 << 29), nr);
341 OUT_RINGp (chan, p, nr);
342
343 p += nr;
344 count -= nr;
345 }
346
347 src += src_pitch;
348 }
349
350 BEGIN_RING(chan, tesla, 0x1440, 1);
351 OUT_RING (chan, 0);
352 }