nv04-nv40: Fix swizzle transfers for NPOT sizes.
[mesa.git] / src / gallium / drivers / nv04 / nv04_surface_2d.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_format.h"
3 #include "util/u_math.h"
4 #include "util/u_memory.h"
5
6 #include "nouveau/nouveau_winsys.h"
7 #include "nouveau/nouveau_util.h"
8 #include "nouveau/nouveau_screen.h"
9 #include "nv04_surface_2d.h"
10
11 static INLINE int
12 nv04_surface_format(enum pipe_format format)
13 {
14 switch (format) {
15 case PIPE_FORMAT_A8_UNORM:
16 return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
17 case PIPE_FORMAT_R16_SNORM:
18 case PIPE_FORMAT_R5G6B5_UNORM:
19 case PIPE_FORMAT_Z16_UNORM:
20 return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
21 case PIPE_FORMAT_X8R8G8B8_UNORM:
22 case PIPE_FORMAT_A8R8G8B8_UNORM:
23 return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
24 case PIPE_FORMAT_Z24S8_UNORM:
25 case PIPE_FORMAT_Z24X8_UNORM:
26 return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
27 default:
28 return -1;
29 }
30 }
31
32 static INLINE int
33 nv04_rect_format(enum pipe_format format)
34 {
35 switch (format) {
36 case PIPE_FORMAT_A8_UNORM:
37 return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
38 case PIPE_FORMAT_R5G6B5_UNORM:
39 case PIPE_FORMAT_Z16_UNORM:
40 return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
41 case PIPE_FORMAT_A8R8G8B8_UNORM:
42 case PIPE_FORMAT_Z24S8_UNORM:
43 case PIPE_FORMAT_Z24X8_UNORM:
44 return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
45 default:
46 return -1;
47 }
48 }
49
50 static INLINE int
51 nv04_scaled_image_format(enum pipe_format format)
52 {
53 switch (format) {
54 case PIPE_FORMAT_A1R5G5B5_UNORM:
55 return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
56 case PIPE_FORMAT_A8R8G8B8_UNORM:
57 return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
58 case PIPE_FORMAT_X8R8G8B8_UNORM:
59 return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
60 case PIPE_FORMAT_R5G6B5_UNORM:
61 case PIPE_FORMAT_R16_SNORM:
62 return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
63 default:
64 return -1;
65 }
66 }
67
68 static INLINE unsigned
69 nv04_swizzle_bits(unsigned x, unsigned y)
70 {
71 unsigned u = (x & 0x001) << 0 |
72 (x & 0x002) << 1 |
73 (x & 0x004) << 2 |
74 (x & 0x008) << 3 |
75 (x & 0x010) << 4 |
76 (x & 0x020) << 5 |
77 (x & 0x040) << 6 |
78 (x & 0x080) << 7 |
79 (x & 0x100) << 8 |
80 (x & 0x200) << 9 |
81 (x & 0x400) << 10 |
82 (x & 0x800) << 11;
83
84 unsigned v = (y & 0x001) << 1 |
85 (y & 0x002) << 2 |
86 (y & 0x004) << 3 |
87 (y & 0x008) << 4 |
88 (y & 0x010) << 5 |
89 (y & 0x020) << 6 |
90 (y & 0x040) << 7 |
91 (y & 0x080) << 8 |
92 (y & 0x100) << 9 |
93 (y & 0x200) << 10 |
94 (y & 0x400) << 11 |
95 (y & 0x800) << 12;
96 return v | u;
97 }
98
99 static int
100 nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
101 struct pipe_surface *dst, int dx, int dy,
102 struct pipe_surface *src, int sx, int sy,
103 int w, int h)
104 {
105 struct nouveau_channel *chan = ctx->swzsurf->channel;
106 struct nouveau_grobj *swzsurf = ctx->swzsurf;
107 struct nouveau_grobj *sifm = ctx->sifm;
108 struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
109 struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
110 const unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
111 /* Max width & height may not be the same on all HW, but must be POT */
112 const unsigned max_w = 1024;
113 const unsigned max_h = 1024;
114 unsigned sub_w = w > max_w ? max_w : w;
115 unsigned sub_h = h > max_h ? max_h : h;
116 unsigned x;
117 unsigned y;
118
119 /* Swizzled surfaces must be POT */
120 assert(util_is_pot(dst->width) && util_is_pot(dst->height));
121
122 /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */
123 assert(sub_w == w || util_is_pot(sub_w));
124 assert(sub_h == h || util_is_pot(sub_h));
125
126 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
127 OUT_RELOCo(chan, dst_bo,
128 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
129
130 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
131 OUT_RING (chan, nv04_surface_format(dst->format) |
132 log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
133 log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
134
135 BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
136 OUT_RELOCo(chan, src_bo,
137 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
138 BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
139 OUT_RING (chan, swzsurf->handle);
140
141 for (y = 0; y < h; y += sub_h) {
142 sub_h = MIN2(sub_h, h - y);
143
144 for (x = 0; x < w; x += sub_w) {
145 sub_w = MIN2(sub_w, w - x);
146
147 /* Must be 64-byte aligned */
148 assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63));
149
150 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
151 OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size,
152 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
153
154 BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
155 OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
156 OUT_RING (chan, nv04_scaled_image_format(src->format));
157 OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
158 OUT_RING (chan, 0);
159 OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
160 OUT_RING (chan, 0);
161 OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
162 OUT_RING (chan, 1 << 20);
163 OUT_RING (chan, 1 << 20);
164
165 BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
166 OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w);
167 OUT_RING (chan, src_pitch |
168 NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
169 NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
170 OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size,
171 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
172 OUT_RING (chan, 0);
173 }
174 }
175
176 return 0;
177 }
178
179 static int
180 nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
181 struct pipe_surface *dst, int dx, int dy,
182 struct pipe_surface *src, int sx, int sy, int w, int h)
183 {
184 struct nouveau_channel *chan = ctx->m2mf->channel;
185 struct nouveau_grobj *m2mf = ctx->m2mf;
186 struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
187 struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
188 unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
189 unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
190 unsigned dst_offset = dst->offset + dy * dst_pitch +
191 dx * dst->texture->block.size;
192 unsigned src_offset = src->offset + sy * src_pitch +
193 sx * src->texture->block.size;
194
195 WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
196 BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
197 OUT_RELOCo(chan, src_bo,
198 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
199 OUT_RELOCo(chan, dst_bo,
200 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
201
202 while (h) {
203 int count = (h > 2047) ? 2047 : h;
204
205 BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
206 OUT_RELOCl(chan, src_bo, src_offset,
207 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
208 OUT_RELOCl(chan, dst_bo, dst_offset,
209 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
210 OUT_RING (chan, src_pitch);
211 OUT_RING (chan, dst_pitch);
212 OUT_RING (chan, w * src->texture->block.size);
213 OUT_RING (chan, count);
214 OUT_RING (chan, 0x0101);
215 OUT_RING (chan, 0);
216
217 h -= count;
218 src_offset += src_pitch * count;
219 dst_offset += dst_pitch * count;
220 }
221
222 return 0;
223 }
224
225 static int
226 nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
227 int dx, int dy, struct pipe_surface *src, int sx, int sy,
228 int w, int h)
229 {
230 struct nouveau_channel *chan = ctx->surf2d->channel;
231 struct nouveau_grobj *surf2d = ctx->surf2d;
232 struct nouveau_grobj *blit = ctx->blit;
233 struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src));
234 struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
235 unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
236 unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
237 int format;
238
239 format = nv04_surface_format(dst->format);
240 if (format < 0)
241 return 1;
242
243 WAIT_RING (chan, 12);
244 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
245 OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
246 OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
247 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
248 OUT_RING (chan, format);
249 OUT_RING (chan, (dst_pitch << 16) | src_pitch);
250 OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
251 OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
252
253 BEGIN_RING(chan, blit, 0x0300, 3);
254 OUT_RING (chan, (sy << 16) | sx);
255 OUT_RING (chan, (dy << 16) | dx);
256 OUT_RING (chan, ( h << 16) | w);
257
258 return 0;
259 }
260
261 static void
262 nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
263 int dx, int dy, struct pipe_surface *src, int sx, int sy,
264 int w, int h)
265 {
266 unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
267 unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
268 int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR;
269 int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR;
270
271 assert(src->format == dst->format);
272
273 /* Setup transfer to swizzle the texture to vram if needed */
274 if (src_linear && !dst_linear && w > 1 && h > 1) {
275 nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
276 return;
277 }
278
279 /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback
280 * to NV_MEMORY_TO_MEMORY_FORMAT in this case.
281 */
282 if ((src->offset & 63) || (dst->offset & 63) ||
283 (src_pitch & 63) || (dst_pitch & 63)) {
284 nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h);
285 return;
286 }
287
288 nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h);
289 }
290
291 static void
292 nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
293 int dx, int dy, int w, int h, unsigned value)
294 {
295 struct nouveau_channel *chan = ctx->surf2d->channel;
296 struct nouveau_grobj *surf2d = ctx->surf2d;
297 struct nouveau_grobj *rect = ctx->rect;
298 struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst));
299 unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
300 int cs2d_format, gdirect_format;
301
302 cs2d_format = nv04_surface_format(dst->format);
303 assert(cs2d_format >= 0);
304
305 gdirect_format = nv04_rect_format(dst->format);
306 assert(gdirect_format >= 0);
307
308 WAIT_RING (chan, 16);
309 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
310 OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
311 OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
312 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
313 OUT_RING (chan, cs2d_format);
314 OUT_RING (chan, (dst_pitch << 16) | dst_pitch);
315 OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
316 OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
317
318 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
319 OUT_RING (chan, gdirect_format);
320 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
321 OUT_RING (chan, value);
322 BEGIN_RING(chan, rect,
323 NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
324 OUT_RING (chan, (dx << 16) | dy);
325 OUT_RING (chan, ( w << 16) | h);
326 }
327
328 void
329 nv04_surface_2d_takedown(struct nv04_surface_2d **pctx)
330 {
331 struct nv04_surface_2d *ctx;
332
333 if (!pctx || !*pctx)
334 return;
335 ctx = *pctx;
336 *pctx = NULL;
337
338 nouveau_notifier_free(&ctx->ntfy);
339 nouveau_grobj_free(&ctx->m2mf);
340 nouveau_grobj_free(&ctx->surf2d);
341 nouveau_grobj_free(&ctx->swzsurf);
342 nouveau_grobj_free(&ctx->rect);
343 nouveau_grobj_free(&ctx->blit);
344 nouveau_grobj_free(&ctx->sifm);
345
346 FREE(ctx);
347 }
348
349 struct nv04_surface_2d *
350 nv04_surface_2d_init(struct nouveau_screen *screen)
351 {
352 struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d);
353 struct nouveau_channel *chan = screen->channel;
354 unsigned handle = 0x88000000, class;
355 int ret;
356
357 if (!ctx)
358 return NULL;
359
360 ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
361 if (ret) {
362 nv04_surface_2d_takedown(&ctx);
363 return NULL;
364 }
365
366 ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
367 if (ret) {
368 nv04_surface_2d_takedown(&ctx);
369 return NULL;
370 }
371
372 BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
373 OUT_RING (chan, ctx->ntfy->handle);
374
375 if (chan->device->chipset < 0x10)
376 class = NV04_CONTEXT_SURFACES_2D;
377 else
378 class = NV10_CONTEXT_SURFACES_2D;
379
380 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
381 if (ret) {
382 nv04_surface_2d_takedown(&ctx);
383 return NULL;
384 }
385
386 BEGIN_RING(chan, ctx->surf2d,
387 NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
388 OUT_RING (chan, chan->vram->handle);
389 OUT_RING (chan, chan->vram->handle);
390
391 if (chan->device->chipset < 0x10)
392 class = NV04_IMAGE_BLIT;
393 else
394 class = NV12_IMAGE_BLIT;
395
396 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
397 if (ret) {
398 nv04_surface_2d_takedown(&ctx);
399 return NULL;
400 }
401
402 BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1);
403 OUT_RING (chan, ctx->ntfy->handle);
404 BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
405 OUT_RING (chan, ctx->surf2d->handle);
406 BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_OPERATION, 1);
407 OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY);
408
409 ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
410 &ctx->rect);
411 if (ret) {
412 nv04_surface_2d_takedown(&ctx);
413 return NULL;
414 }
415
416 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
417 OUT_RING (chan, ctx->ntfy->handle);
418 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
419 OUT_RING (chan, ctx->surf2d->handle);
420 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
421 OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
422 BEGIN_RING(chan, ctx->rect,
423 NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
424 OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
425
426 switch (chan->device->chipset & 0xf0) {
427 case 0x00:
428 case 0x10:
429 class = NV04_SWIZZLED_SURFACE;
430 break;
431 case 0x20:
432 class = NV20_SWIZZLED_SURFACE;
433 break;
434 case 0x30:
435 class = NV30_SWIZZLED_SURFACE;
436 break;
437 case 0x40:
438 case 0x60:
439 class = NV40_SWIZZLED_SURFACE;
440 break;
441 default:
442 /* Famous last words: this really can't happen.. */
443 assert(0);
444 break;
445 }
446
447 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
448 if (ret) {
449 nv04_surface_2d_takedown(&ctx);
450 return NULL;
451 }
452
453 switch (chan->device->chipset & 0xf0) {
454 case 0x10:
455 case 0x20:
456 class = NV10_SCALED_IMAGE_FROM_MEMORY;
457 break;
458 case 0x30:
459 class = NV30_SCALED_IMAGE_FROM_MEMORY;
460 break;
461 case 0x40:
462 case 0x60:
463 class = NV40_SCALED_IMAGE_FROM_MEMORY;
464 break;
465 default:
466 class = NV04_SCALED_IMAGE_FROM_MEMORY;
467 break;
468 }
469
470 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
471 if (ret) {
472 nv04_surface_2d_takedown(&ctx);
473 return NULL;
474 }
475
476 ctx->copy = nv04_surface_copy;
477 ctx->fill = nv04_surface_fill;
478 return ctx;
479 }