Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / nvfx / nv04_2d.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Ben Skeggs
4 * Copyright 2009 Younes Manton
5 * Copyright 2010 Luca Barbieri
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
20 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * The above copyright notice and this permission notice (including the
26 * next paragraph) shall be included in all copies or substantial portions
27 * of the Software.
28 *
29 **************************************************************************/
30
31 /* this code has no Mesa or Gallium dependency and can be reused in the classic Mesa driver or DDX */
32
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <stdint.h>
36 #include <nouveau/nouveau_class.h>
37 #include <nouveau/nouveau_device.h>
38 #include <nouveau/nouveau_pushbuf.h>
39 #include <nouveau/nouveau_channel.h>
40 #include <nouveau/nouveau_bo.h>
41 #include <nouveau/nouveau_notifier.h>
42 #include <nouveau/nouveau_grobj.h>
43 #include "nv04_2d.h"
44
45 /* avoid depending on Mesa/Gallium */
46 #ifdef __GNUC__
47 #define likely(x) __builtin_expect(!!(x), 1)
48 #define unlikely(x) __builtin_expect(!!(x), 0)
49 #else
50 #define likely(x) !!(x)
51 #define unlikely(x) !!(x)
52 #endif
53
54 #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
55 #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
56
57 struct nv04_2d_context
58 {
59 struct nouveau_notifier *ntfy;
60 struct nouveau_grobj *surf2d;
61 struct nouveau_grobj *swzsurf;
62 struct nouveau_grobj *m2mf;
63 struct nouveau_grobj *rect;
64 struct nouveau_grobj *sifm;
65 struct nouveau_grobj *blit;
66 };
67
68 static inline int
69 align(int value, int alignment)
70 {
71 return (value + alignment - 1) & ~(alignment - 1);
72 }
73
74 static inline int
75 util_is_pot(unsigned x)
76 {
77 return (x & (x - 1)) == 0;
78 }
79
80 /* Integer base-2 logarithm, rounded towards zero. */
81 static inline unsigned log2i(unsigned i)
82 {
83 unsigned r = 0;
84
85 if (i & 0xffff0000) {
86 i >>= 16;
87 r += 16;
88 }
89 if (i & 0x0000ff00) {
90 i >>= 8;
91 r += 8;
92 }
93 if (i & 0x000000f0) {
94 i >>= 4;
95 r += 4;
96 }
97 if (i & 0x0000000c) {
98 i >>= 2;
99 r += 2;
100 }
101 if (i & 0x00000002) {
102 r += 1;
103 }
104 return r;
105 }
106
107 //#define NV04_REGION_DEBUG
108
109 // Yes, we really want to inline everything, since all the functions are used only once
110 #if defined(__GNUC__) && defined(DEBUG)
111 #define inline __attribute__((always_inline)) inline
112 #endif
113
114 static inline unsigned
115 nv04_swizzle_bits_square(unsigned x, unsigned y)
116 {
117 unsigned u = (x & 0x001) << 0 |
118 (x & 0x002) << 1 |
119 (x & 0x004) << 2 |
120 (x & 0x008) << 3 |
121 (x & 0x010) << 4 |
122 (x & 0x020) << 5 |
123 (x & 0x040) << 6 |
124 (x & 0x080) << 7 |
125 (x & 0x100) << 8 |
126 (x & 0x200) << 9 |
127 (x & 0x400) << 10 |
128 (x & 0x800) << 11;
129
130 unsigned v = (y & 0x001) << 1 |
131 (y & 0x002) << 2 |
132 (y & 0x004) << 3 |
133 (y & 0x008) << 4 |
134 (y & 0x010) << 5 |
135 (y & 0x020) << 6 |
136 (y & 0x040) << 7 |
137 (y & 0x080) << 8 |
138 (y & 0x100) << 9 |
139 (y & 0x200) << 10 |
140 (y & 0x400) << 11 |
141 (y & 0x800) << 12;
142 return v | u;
143 }
144
145 /* rectangular swizzled textures are linear concatenations of swizzled square tiles */
146 static inline unsigned
147 nv04_swizzle_bits_2d(unsigned x, unsigned y, unsigned w, unsigned h)
148 {
149 if(h <= 1)
150 return x;
151 else
152 {
153 unsigned s = MIN2(w, h);
154 unsigned m = s - 1;
155 return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
156 }
157 }
158
159 // general 3D texture case
160 static inline unsigned
161 nv04_swizzle_bits(unsigned x, unsigned y, unsigned z, unsigned w, unsigned h, unsigned d)
162 {
163 if(d <= 1)
164 return nv04_swizzle_bits_2d(x, y, w, h);
165 else
166 {
167 // TODO: autogenerate code for all possible texture sizes (13 * 13 * 13 with dims <= 4096) and do a single indirect call
168 unsigned v = 0;
169 w >>= 1;
170 h >>= 1;
171 d >>= 1;
172 for(int i = 0;;)
173 {
174 int oldi = i;
175 if(likely(w))
176 {
177 v |= (x & 1) << i;
178 x >>= 1;
179 w >>= 1;
180 ++i;
181 }
182
183 if(likely(h))
184 {
185 v |= (y & 1) << i;
186 y >>= 1;
187 h >>= 1;
188 ++i;
189 }
190
191 if(likely(d))
192 {
193 v |= (z & 1) << i;
194 z >>= 1;
195 d >>= 1;
196 ++i;
197 }
198
199 if(i == oldi)
200 break;
201 }
202 return v;
203 }
204 }
205
206 unsigned
207 nv04_region_begin(struct nv04_region* rgn, unsigned w, unsigned h)
208 {
209 if(rgn->pitch)
210 return rgn->pitch * rgn->y + (rgn->x << rgn->bpps);
211 else
212 return nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d) << rgn->bpps;
213 }
214
215 unsigned
216 nv04_region_end(struct nv04_region* rgn, unsigned w, unsigned h)
217 {
218 if(rgn->pitch)
219 return rgn->pitch * (rgn->y + h - 1) + ((rgn->x + w) << rgn->bpps);
220 else
221 return (nv04_swizzle_bits(rgn->x + w - 1, rgn->y + h - 1, rgn->z, rgn->w, rgn->h, rgn->d) + 1) << rgn->bpps;
222 }
223
224 // *pitch = -1 -> use 3D swizzling for (x, y), *pitch = 0 -> use 2D swizzling, other *pitch -> use linear calculations
225 // returns 2 if pixel order is 3D-swizzled and 1 if subrect is 2D-swizzled
226 /* *pitch == -1 ret = 0 -> 3D swizzled subrect
227 * *pitch == 0 ret = 0 -> 2D swizzled subrect
228 * *pitch > 0 ret = 0 -> linear subrect
229 * *pitch > 0 ret = 1 -> linear subrect, but with swizzled 3D data inside
230 */
231
232 static inline void
233 nv04_region_print(struct nv04_region* rgn)
234 {
235 fprintf(stderr, "<%i[%i]> ", rgn->bo->handle, rgn->offset);
236 if(rgn->pitch)
237 fprintf(stderr, "lin %i", rgn->pitch);
238 else
239 fprintf(stderr, "swz %ix%ix%i", rgn->w, rgn->h, rgn->d);
240 fprintf(stderr, " (%i, %i, %i)", rgn->x, rgn->y, rgn->z);
241 }
242
243 static inline void
244 nv04_region_assert(struct nv04_region* rgn, unsigned w, unsigned h)
245 {
246 unsigned end = rgn->offset + nv04_region_end(rgn, w, h);
247
248 assert(rgn->offset <= (int)rgn->bo->size);
249 assert(end <= rgn->bo->size);
250 (void) end;
251 if(!rgn->pitch) {
252 assert(util_is_pot(rgn->w));
253 assert(util_is_pot(rgn->h));
254 }
255 }
256
257 /* determine if region can be linearized or fake-linearized */
258 static inline int
259 nv04_region_is_contiguous(struct nv04_region* rgn, int w, int h)
260 {
261 int surf_min;
262 int rect_min;
263
264 if(rgn->pitch)
265 return rgn->pitch == w << rgn->bpps;
266
267 // redundant, but this is the fast path for the common case
268 if(w == rgn->w && h == rgn->h && rgn->d <= 1)
269 return 1;
270
271 // must be POT
272 if((w & (w - 1)) || (h & (h - 1)))
273 return 0;
274
275 // must be aligned
276 if((rgn->x & (w - 1)) || (rgn->y & (h - 1)))
277 return 0;
278
279 if(rgn->d > 1)
280 return 0;
281
282 surf_min = MIN2(rgn->w, rgn->h);
283 rect_min = MIN2(w, h);
284
285 if((rect_min == surf_min) || (w == h) || (w == 2 * h))
286 return 1;
287
288 return 0;
289 }
290
291 // double the pitch until it is larger than the alignment, or the height becomes odd or 1
292 static inline void
293 nv04_region_contiguous_shape(struct nv04_region* rgn, int* w, int* h, int align)
294 {
295 while(!(*h & 1) && (*w << rgn->bpps) < (1 << align))
296 {
297 *w <<= 1;
298 *h >>= 1;
299 }
300
301 while((*w << rgn->bpps) > 16384 && !(*w & 1))
302 {
303 *w >>= 1;
304 *h <<= 1;
305 }
306
307 #ifdef NV04_REGION_DEBUG
308 fprintf(stderr, "\tCONTIGUOUS %ix%i\n", *w, *h);
309 #endif
310 }
311
312 static inline void
313 nv04_region_linearize_contiguous(struct nv04_region* rgn, unsigned w, unsigned h)
314 {
315 int pos;
316 if(rgn->pitch)
317 {
318 rgn->offset += rgn->y * rgn->pitch + (rgn->x << rgn->bpps);
319 rgn->x = 0;
320 rgn->y = 0;
321 }
322 else
323 {
324 rgn->offset += (rgn->w * rgn->h * rgn->z) << rgn->bpps;
325 pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d);
326 rgn->x = pos & (w - 1);
327 rgn->y = pos / w;
328 }
329 rgn->pitch = w << rgn->bpps;
330
331 #ifdef NV04_REGION_DEBUG
332 fprintf(stderr, "\tLINEARIZE ");
333 nv04_region_print(rgn);
334 fprintf(stderr, "\n");
335 #endif
336 }
337
338 /* preserve the offset! */
339 /*
340 rgn->pitch = util_format_get_stride(rgn->format, w);
341 int pos = nv04_swizzle_bits(rgn->x, rgn->y, rgn->z, rgn->w, rgn->h, rgn->d);
342 rgn->x = pos & (w - 1);
343 rgn->y = pos & ~(w - 1);
344 */
345
346 /*
347 rgn->offset +=
348 rgn->pitch = util_format_get_stride(rgn->format, w);
349 rgn->x = 0;
350 rgn->y = 0;
351 */
352
353 /* This code will get used for, and always succeed on:
354 * - 4x2 1bpp swizzled texture mipmap levels
355 * - linear regions created by linearization
356 *
357 * This code will get used for, and MAY work for:
358 * - misaligned texture blanket
359 * - linear surfaces created without wide_pitch (in this case, it will only work if we are lucky)
360 *
361 * The general case requires splitting the region in 2.
362 */
363 static inline int
364 nv04_region_do_align_offset(struct nv04_region* rgn, unsigned w, unsigned h, int shift)
365 {
366 if(rgn->pitch > 0)
367 {
368 int delta;
369
370 assert(!(rgn->offset & ((1 << rgn->bpps) - 1))); // fatal!
371 delta = rgn->offset & ((1 << shift) - 1);
372
373 if(h <= 1)
374 {
375 rgn->x += delta >> rgn->bpps;
376 rgn->offset -= delta;
377 rgn->pitch = align((rgn->x + w) << rgn->bpps, 1 << shift);
378 }
379 else
380 {
381 int newxo = (rgn->x << rgn->bpps) + delta;
382 int dy = newxo / rgn->pitch;
383 newxo -= dy * rgn->pitch;
384 if((newxo + (w << rgn->bpps)) > rgn->pitch)
385 {
386 // TODO: split the region into two rectangles (!) if *really* necessary, unless the hardware actually supports "wrapping" rectangles
387 // this does not happen if the surface is pitch-aligned, which it should always be
388 assert(0);
389 return -1;
390 }
391 rgn->x = newxo >> rgn->bpps;
392 rgn->y += dy;
393 }
394 }
395 else
396 {
397 int size;
398 int min;
399 int v;
400
401 // we don't care about the alignment of 3D surfaces since the 2D engine can't use them
402 if(rgn->d < 0)
403 return -1;
404
405 min = MIN2(rgn->w, rgn->h);
406 size = min * min << rgn->bpps;
407
408 // this is unfixable, and should not be happening
409 if(rgn->offset & (size - 1))
410 return -1;
411
412 v = (rgn->offset & ((1 << shift) - 1)) / size;
413 rgn->offset -= v * size;
414
415 if(rgn->h == min)
416 {
417 unsigned w;
418 rgn->x += rgn->h * v;
419 w = rgn->w + rgn->h * v;
420
421 while(rgn->w < w)
422 rgn->w += rgn->w;
423 }
424 else
425 {
426 unsigned h;
427 rgn->y += rgn->w * v;
428 h = rgn->h + rgn->w * v;
429
430 while(rgn->h < h)
431 rgn->h += rgn->h;
432 }
433 }
434
435 #ifdef NV04_REGION_DEBUG
436 fprintf(stderr, "\tALIGNED ");
437 nv04_region_print(rgn);
438 fprintf(stderr, "\n");
439 #endif
440 return 0;
441 }
442
443 // both pitch and shift
444 // will leave the region unchanged if it fails
445 static inline int
446 nv04_region_align(struct nv04_region* rgn, unsigned w, unsigned h, int shift)
447 {
448 if(rgn->pitch & ((1 << shift) - 1))
449 {
450 if(h == 1)
451 goto do_align; /* this will fix pitch too in this case */
452 else
453 return -1;
454 }
455
456 if(rgn->offset & ((1 << shift) - 1))
457 {
458 do_align:
459 if(nv04_region_do_align_offset(rgn, w, h, shift))
460 return -1;
461 }
462 return 0;
463 }
464
465 /* this contains 22 different copy loops after preprocessing. unfortunately, it's necessary */
466 void
467 nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h)
468 {
469 uint8_t* mdst;
470 uint8_t* msrc;
471 int size;
472
473 if(dst->bo != src->bo)
474 {
475 nouveau_bo_map(dst->bo, NOUVEAU_BO_WR);
476 nouveau_bo_map(src->bo, NOUVEAU_BO_RD);
477 }
478 else
479 nouveau_bo_map(dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_RD);
480
481 mdst = (uint8_t*)dst->bo->map + dst->offset;
482 msrc = (uint8_t*)src->bo->map + src->offset;
483
484 size = w << dst->bpps;
485
486 nv04_region_assert(dst, w, h);
487 nv04_region_assert(src, w, h);
488
489 #ifdef NV04_REGION_DEBUG
490 fprintf(stderr, "\tRGN_COPY_CPU [%i, %i: %i] ", w, h, dst->bpps);
491 for(int i = 0; i < 2; ++i)
492 {
493 nv04_region_print(i ? src : dst);
494 fprintf(stderr, i ? "\n" : " <- ");
495 }
496
497 // for(int i = 0; i < 16; ++i)
498 // fprintf(stderr, "%02x ", msrc[i]);
499 // fprintf(stderr, "\n");
500 #endif
501
502 // TODO: support overlapping copies!
503 if(src->pitch && dst->pitch)
504 {
505 mdst += dst->y * dst->pitch + (dst->x << dst->bpps);
506 msrc += src->y * src->pitch + (src->x << src->bpps);
507 if(dst->bo != src->bo)
508 goto simple;
509 else if(mdst < msrc)
510 {
511 if(mdst + size <= msrc)
512 {
513 simple:
514 for(int iy = 0; iy < h; ++iy)
515 {
516 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
517 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
518 memcpy(mdst, msrc, size);
519 msrc += src->pitch; mdst += dst->pitch;
520 }
521 }
522 else
523 {
524 for(int iy = 0; iy < h; ++iy)
525 {
526 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
527 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
528 memmove(mdst, msrc, size);
529 msrc += src->pitch; mdst += dst->pitch;
530 }
531 }
532 }
533 else
534 {
535 /* copy backwards so we don't destroy data we have to read yet */
536 if(msrc + size <= mdst)
537 {
538 for(int iy = h - 1; iy >= 0; --iy)
539 {
540 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
541 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
542 memcpy(mdst, msrc, size);
543 msrc += src->pitch; mdst += dst->pitch;
544 }
545 }
546 else
547 {
548 for(int iy = h - 1; iy >= 0; --iy)
549 {
550 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
551 assert(msrc + size <= (uint8_t*)src->bo->map + src->bo->size);
552 memmove(mdst, msrc, size);
553 msrc += src->pitch; mdst += dst->pitch;
554 }
555 }
556 }
557 }
558 else
559 {
560 int* dswx = NULL;
561 int* dswy = NULL;
562 int* sswx = NULL;
563 int* sswy = NULL;
564 int dir;
565
566 if(!dst->pitch)
567 {
568 dswx = alloca(w * sizeof(int));
569 for(int ix = 0; ix < w; ++ix) // we are adding, so z cannot be contributed by both
570 dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, 0, dst->w, dst->h, dst->d);
571 dswy = alloca(h * sizeof(int));
572 for(int iy = 0; iy < h; ++iy)
573 dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d);
574 }
575
576 if(!src->pitch)
577 {
578 sswx = alloca(w * sizeof(int));
579 for(int ix = 0; ix < w; ++ix)
580 sswx[ix] = nv04_swizzle_bits(src->x + ix, 0, 0, src->w, src->h, src->d);
581 sswy = alloca(h * sizeof(int));
582 for(int iy = 0; iy < h; ++iy)
583 sswy[iy] = nv04_swizzle_bits(0, src->y + iy, src->z, src->w, src->h, src->d);
584 }
585
586 dir = 1;
587 /* do backwards copies for overlapping swizzled surfaces */
588 if(dst->pitch == src->pitch && dst->offset == src->offset)
589 {
590 if(dst->y > src->y || (dst->y == src->y && dst->x > src->x))
591 dir = -1;
592 }
593
594 #define SWIZZLED_COPY_LOOPS
595 if(dir == 1)
596 {
597 int dir = 1;
598 #define LOOP_Y for(int iy = 0; iy < h; ++iy)
599 #define LOOP_X for(int ix = 0; ix < w; ++ix)
600 #include "nv04_2d_loops.h"
601 #undef LOOP_X
602 #undef LOOP_Y
603 }
604 else
605 {
606 int dir = -1;
607 #define LOOP_Y for(int iy = h - 1; iy >= 0; --iy)
608 #define LOOP_X for(int ix = w - 1; ix >= 0; --ix)
609 #include "nv04_2d_loops.h"
610 #undef LOOP_X
611 #undef LOOP_Y
612 }
613 #undef SWIZZLED_COPY_LOOP
614 }
615
616 if(src->bo != dst->bo)
617 nouveau_bo_unmap(src->bo);
618 nouveau_bo_unmap(dst->bo);
619 }
620
621 /* TODO: if the destination is swizzled, we are doing random writes, which causes write combining to fail
622 * the alternative is to read, modify and copy back, which may or may not be faster
623 * loading 3D textures is a common case that hits this and could probably benefit from the temporary
624 */
625 void
626 nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value)
627 {
628 uint8_t* mdst = (nouveau_bo_map(dst->bo, NOUVEAU_BO_WR), (uint8_t*)dst->bo->map + dst->offset);
629
630 #ifdef NV04_REGION_DEBUG
631 fprintf(stderr, "\tRGN_FILL_CPU ");
632 nv04_region_print(dst);
633 fprintf(stderr, "\n");
634 #endif
635
636 nv04_region_assert(dst, w, h);
637
638 if(dst->pitch)
639 {
640 unsigned size = w << dst->bpps;
641
642 #define FILL(T) do { \
643 for(int iy = 0; iy < h; ++iy) \
644 { \
645 assert((char*)((T*)mdst + w) <= (char*)dst->bo->map + dst->bo->size); \
646 for(int ix = 0; ix < w; ++ix) \
647 ((T*)mdst)[ix] = (T)value; \
648 mdst += dst->pitch; \
649 } \
650 } while(0)
651
652 mdst += dst->y * dst->pitch + (dst->x << dst->bpps);
653
654 if(dst->bpps == 0)
655 {
656 ms:
657 assert(mdst + size * h <= (uint8_t*)dst->bo->map + dst->bo->size);
658 if(size == dst->pitch)
659 memset(mdst, (uint8_t)value, size * h);
660 else
661 {
662 for(int iy = 0; iy < h; ++iy)
663 {
664 assert(mdst + size <= (uint8_t*)dst->bo->map + dst->bo->size);
665 memset(mdst, (uint8_t)value, size);
666 mdst += dst->pitch;
667 }
668 }
669 }
670 else if(dst->bpps == 1)
671 {
672 if(!((uint8_t)value ^ (uint8_t)(value >> 8)))
673 goto ms;
674
675 FILL(uint16_t);
676 }
677 else if(dst->bpps == 2)
678 {
679 if(value == (uint8_t)value * 0x1010101)
680 goto ms;
681 FILL(uint32_t);
682 }
683 else
684 assert(0);
685 #undef FILL
686 }
687 else
688 {
689 int* dswx;
690 int* dswy;
691
692 dswx = alloca(w * sizeof(int));
693 for(int ix = 0; ix < w; ++ix)
694 dswx[ix] = nv04_swizzle_bits(dst->x + ix, 0, dst->z, dst->w, dst->h, dst->d);
695 dswy = alloca(h * sizeof(int));
696 for(int iy = 0; iy < h; ++iy)
697 dswy[iy] = nv04_swizzle_bits(0, dst->y + iy, dst->z, dst->w, dst->h, dst->d);
698
699 #define FILL(T) do { \
700 T tvalue = (T)value; \
701 for(int iy = 0; iy < h; ++iy) \
702 { \
703 T* pdst = (T*)mdst + dswy[iy]; \
704 for(int ix = 0; ix < w; ++ix) \
705 { \
706 assert((uint8_t*)&pdst[dswx[ix] + 1] <= (uint8_t*)dst->bo->map + dst->bo->size); \
707 pdst[dswx[ix]] = tvalue; \
708 } \
709 } \
710 } while(0)
711
712 if(dst->bpps == 0)
713 FILL(uint8_t);
714 else if(dst->bpps == 1)
715 FILL(uint16_t);
716 else if(dst->bpps == 2)
717 FILL(uint32_t);
718 else
719 assert(0 && "unhandled bpp");
720 #undef FILL
721 }
722
723 nouveau_bo_unmap(dst->bo);
724 }
725
726 static void
727 nv04_region_copy_swizzle(struct nv04_2d_context *ctx,
728 struct nv04_region* dst,
729 struct nv04_region* src,
730 int w, int h, int cs2d_format, int sifm_format)
731 {
732 struct nouveau_channel *chan = ctx->swzsurf->channel;
733 struct nouveau_grobj *swzsurf = ctx->swzsurf;
734 struct nouveau_grobj *sifm = ctx->sifm;
735 /* Max width & height may not be the same on all HW, but must be POT */
736 unsigned max_shift = 10;
737 unsigned cw = 1 << max_shift;
738 unsigned ch = 1 << max_shift;
739 unsigned sx = dst->x >> max_shift;
740 unsigned sy = dst->y >> max_shift;
741 unsigned ex = (dst->x + w - 1) >> max_shift;
742 unsigned ey = (dst->y + h - 1) >> max_shift;
743 unsigned chunks = (ex - sx + 1) * (ey - sy + 1);
744 unsigned chunk_size;
745 if(dst->w < cw)
746 cw = dst->w;
747 if(dst->h < ch)
748 ch = dst->h;
749 chunk_size = cw * ch << dst->bpps;
750
751 #ifdef NV04_REGION_DEBUG
752 fprintf(stderr, "\tRGN_COPY_SWIZZLE [%i, %i: %i] ", w, h, dst->bpps);
753 for(int i = 0; i < 2; ++i)
754 {
755 nv04_region_print(i ? src : dst);
756 fprintf(stderr, i ? "\n" : " <- ");
757 }
758 #endif
759
760 nv04_region_assert(dst, w, h);
761 nv04_region_assert(src, w, h);
762
763 MARK_RING (chan, 8 + chunks * 17, 2 + chunks * 2);
764
765 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
766 OUT_RELOCo(chan, dst->bo,
767 NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
768
769 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
770 OUT_RING (chan, cs2d_format |
771 log2i(cw) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
772 log2i(ch) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
773
774 BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
775 OUT_RELOCo(chan, src->bo,
776 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
777 BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
778 OUT_RING (chan, swzsurf->handle);
779
780 assert(!(dst->offset & 63));
781
782 for (int cy = sy; cy <= ey; ++cy) {
783 int ry = MAX2(0, (int)(dst->y - ch * cy));
784 int rh = MIN2((int)ch, (int)(dst->y - ch * cy + h)) - ry;
785 for (int cx = sx; cx <= ex; ++cx) {
786 int rx = MAX2(0, (int)(dst->x - cw * cx));
787 int rw = MIN2((int)cw, (int)(dst->x - cw * cx + w)) - rx;
788 unsigned dst_offset;
789 unsigned src_offset;
790
791 BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
792
793 dst_offset = dst->offset + (nv04_swizzle_bits_2d(cx * cw, cy * ch, dst->w, dst->h) << dst->bpps);
794 assert(dst_offset <= dst->bo->size);
795 assert(dst_offset + chunk_size <= dst->bo->size);
796 OUT_RELOCl(chan, dst->bo, dst_offset,
797 NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
798
799 BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
800 OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
801 OUT_RING (chan, sifm_format);
802 OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
803 OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
804 OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | rw);
805 OUT_RING (chan, rx | (ry << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
806 OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | rw);
807 OUT_RING (chan, 1 << 20);
808 OUT_RING (chan, 1 << 20);
809
810 BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
811 OUT_RING (chan, rh << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | align(rw, 8));
812 OUT_RING (chan, src->pitch |
813 NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
814 NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
815 src_offset = src->offset + (cy * ch + ry + src->y - dst->y) * src->pitch + ((cx * cw + rx + src->x - dst->x) << src->bpps);
816 assert(src_offset <= src->bo->size);
817 assert(src_offset + (src->pitch * (rh - 1)) + (rw << src->bpps) <= src->bo->size);
818 OUT_RELOCl(chan, src->bo, src_offset,
819 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
820 OUT_RING (chan, 0);
821 }
822 }
823 }
824
825 static inline void
826 nv04_copy_m2mf_begin(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, struct nouveau_bo* srcbo, unsigned commands)
827 {
828 struct nouveau_channel *chan = ctx->m2mf->channel;
829 struct nouveau_grobj *m2mf = ctx->m2mf;
830 MARK_RING (chan, 3 + commands * 9, 2 + commands * 2);
831 BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
832 OUT_RELOCo(chan, srcbo,
833 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
834 OUT_RELOCo(chan, dstbo,
835 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
836 }
837
838 static inline void
839 nv04_copy_m2mf_body(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int* pdstoff, unsigned dstpitch, struct nouveau_bo* srcbo, int* psrcoff, unsigned srcpitch, unsigned size, unsigned lines)
840 {
841 struct nouveau_channel *chan = ctx->m2mf->channel;
842 struct nouveau_grobj *m2mf = ctx->m2mf;
843
844 #ifdef NV04_REGION_DEBUG
845 fprintf(stderr, "\t\t\tCOPY_M2MF_BODY [%i, %i] <%i[%u]> lin %u <- <%i[%u]> lin %u\n", size, lines, dstbo->handle, *pdstoff, dstpitch, srcbo->handle, *psrcoff, srcpitch);
846 #endif
847
848 BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
849 OUT_RELOCl(chan, srcbo, *psrcoff,
850 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
851 OUT_RELOCl(chan, dstbo, *pdstoff,
852 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
853 OUT_RING (chan, srcpitch);
854 OUT_RING (chan, dstpitch);
855 OUT_RING (chan, size);
856 OUT_RING (chan, lines);
857 OUT_RING (chan, 0x0101);
858 OUT_RING (chan, 0);
859
860 *psrcoff += srcpitch * lines;
861 *pdstoff += dstpitch * lines;
862 }
863
864 static void
865 nv04_copy_m2mf(struct nv04_2d_context *ctx,
866 struct nouveau_bo* dstbo, int dstoff, unsigned dstpitch,
867 struct nouveau_bo* srcbo, int srcoff, unsigned srcpitch,
868 unsigned size, unsigned h)
869 {
870 unsigned max_pitch = 32767;
871 unsigned max_lines = 2047;
872
873 #ifdef NV04_REGION_DEBUG
874 fprintf(stderr, "\t\tCOPY_M2MF [%i, %i] <%i[%i]> lin %u <- <%i[%i]> lin %u\n", size, h, dstbo->handle, dstoff, dstpitch, srcbo->handle, srcoff, srcpitch);
875 #endif
876
877 if(srcpitch <= max_pitch && dstpitch <= max_pitch)
878 {
879 unsigned full_pages = h / max_lines;
880 unsigned leftover_lines = h - full_pages * max_lines;
881
882 nv04_copy_m2mf_begin(ctx, dstbo, srcbo, full_pages + !!leftover_lines);
883
884 for(unsigned i = 0; i < full_pages; ++i)
885 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, max_lines);
886
887 if(leftover_lines)
888 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, dstpitch, srcbo, &srcoff, srcpitch, size, leftover_lines);
889 }
890 else
891 {
892 unsigned lines = size / max_pitch;
893 unsigned leftover = size - lines * max_pitch;
894 unsigned full_pages = lines / max_lines;
895 unsigned leftover_lines = lines - full_pages * max_lines;
896 unsigned srcgap = srcpitch - size;
897 unsigned dstgap = dstpitch - size;
898
899 nv04_copy_m2mf_begin(ctx, dstbo, srcbo, h * (full_pages + !!leftover_lines + !!leftover));
900
901 for(unsigned i = 0; i < h; ++i)
902 {
903 for(unsigned j = 0; j < full_pages; ++j)
904 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, max_lines);
905
906 if(leftover_lines)
907 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, max_pitch, srcbo, &srcoff, max_pitch, max_pitch, leftover_lines);
908
909 if(leftover)
910 nv04_copy_m2mf_body(ctx, dstbo, &dstoff, leftover, srcbo, &srcoff, leftover, leftover, 1);
911
912 srcoff += srcgap;
913 dstoff += dstgap;
914 }
915 }
916 }
917
918 void
919 nv04_memcpy(struct nv04_2d_context *ctx, struct nouveau_bo* dstbo, int dstoff, struct nouveau_bo* srcbo, int srcoff, unsigned size)
920 {
921 #ifdef NV04_REGION_DEBUG
922 fprintf(stderr, "\tMEMCPY [%i] <%i[%i]> <- <%i[%i]>\n", size, dstbo->handle, dstoff, srcbo->handle, srcoff);
923 #endif
924
925 nv04_copy_m2mf(ctx, dstbo, dstoff, size, srcbo, srcoff, size, size, 1);
926 }
927
928 static void
929 nv04_region_copy_m2mf(struct nv04_2d_context *ctx, struct nv04_region *dst, struct nv04_region *src, int w, int h)
930 {
931 #ifdef NV04_REGION_DEBUG
932 fprintf(stderr, "\tRGN_COPY_M2MF [%i, %i: %i] ", w, h, dst->bpps);
933 for(int i = 0; i < 2; ++i)
934 {
935 nv04_region_print(i ? src : dst);
936 fprintf(stderr, i ? "\n" : " <- ");
937 }
938 #endif
939
940 nv04_region_assert(dst, w, h);
941 nv04_region_assert(src, w, h);
942 assert(src->pitch);
943 assert(dst->pitch);
944
945 nv04_copy_m2mf(ctx,
946 dst->bo, dst->offset + dst->y * dst->pitch + (dst->x << dst->bpps), dst->pitch,
947 src->bo, src->offset + src->y * src->pitch + (src->x << src->bpps), src->pitch,
948 w << src->bpps, h);
949 }
950
951 static inline void
952 nv04_region_copy_blit(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src, int w, int h, int format)
953 {
954 struct nouveau_channel *chan = ctx->surf2d->channel;
955 struct nouveau_grobj *surf2d = ctx->surf2d;
956 struct nouveau_grobj *blit = ctx->blit;
957
958 #ifdef NV04_REGION_DEBUG
959 fprintf(stderr, "\tRGN_COPY_BLIT [%i, %i: %i] ", w, h, dst->bpps);
960 for(int i = 0; i < 2; ++i)
961 {
962 nv04_region_print(i ? src : dst);
963 fprintf(stderr, i ? "\n" : " <- ");
964 }
965 #endif
966
967 assert(!(src->pitch & 63) && src->pitch);
968 assert(!(dst->pitch & 63) && dst->pitch);
969 nv04_region_assert(dst, w, h);
970 nv04_region_assert(src, w, h);
971
972 MARK_RING (chan, 12, 4);
973 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
974 OUT_RELOCo(chan, src->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
975 OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
976 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
977 OUT_RING (chan, format);
978 OUT_RING (chan, (dst->pitch << 16) | src->pitch);
979 OUT_RELOCl(chan, src->bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
980 OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
981
982 BEGIN_RING(chan, blit, 0x0300, 3);
983 OUT_RING (chan, (src->y << 16) | src->x);
984 OUT_RING (chan, (dst->y << 16) | dst->x);
985 OUT_RING (chan, ( h << 16) | w);
986 }
987
988 /* THEOREM: a non-linearizable swizzled destination is always 64 byte aligned, except for 4x2 mipmap levels of swizzled 1bpp surfaces
989 * HYPOTESIS:
990 * 1. The first mipmap level is 64-byte-aligned
991 * PROOF:
992 * 1. Thus, all mipmaps level with a parent which is 64-byte or more in size are.
993 * 2. At 1bpp, the smallest levels with a <= 32-byte parent are either Nx1 or 1xN or size <=8, thus 4x2, 2x2 or 2x4
994 * 3. Nx1, 1xN, 2x4, 2x2 have all subrects linearizable. 4x2 does not.
995 * 4. At 2/4bpp or more, the smallest levels with a 32-byte parent are 1xN, Nx1 or 2x2
996 *
997 * However, nv04_region_align handles that.
998 */
999
1000 // 0 -> done, 1 -> do with 3D engine or CPU, -1 -> do with CPU
1001 // dst and src may be modified, and the possibly modified version should be passed to nv04_region_cpu if necessary
1002 int
1003 nv04_region_copy_2d(struct nv04_2d_context *ctx, struct nv04_region* dst, struct nv04_region* src,
1004 int w, int h, int cs2d_format, int sifm_format, int dst_to_gpu, int src_on_gpu)
1005 {
1006 assert(src->bpps == dst->bpps);
1007
1008 #ifdef NV04_REGION_DEBUG
1009 fprintf(stderr, "RGN_COPY%s [%i, %i: %i] ", (cs2d_format >= 0) ? "_2D" : "_NO2D", w, h, dst->bpps);
1010 for(int i = 0; i < 2; ++i)
1011 {
1012 int gpu = i ? src_on_gpu : dst_to_gpu;
1013 nv04_region_print(i ? src : dst);
1014 fprintf(stderr, " %s", gpu ? "gpu" : "cpu");
1015 fprintf(stderr, i ? "\n" : " <- ");
1016 }
1017 #endif
1018
1019 // if they are contiguous and either both swizzled or both linear, reshape
1020 if(!dst->pitch == !src->pitch
1021 && nv04_region_is_contiguous(dst, w, h)
1022 && nv04_region_is_contiguous(src, w, h))
1023 {
1024 nv04_region_contiguous_shape(dst, &w, &h, 6);
1025 nv04_region_linearize_contiguous(dst, w, h);
1026 nv04_region_linearize_contiguous(src, w, h);
1027 }
1028
1029 #ifdef NV04_REGION_DEBUG
1030 fprintf(stderr, "\tOPT ");
1031 for(int i = 0; i < 2; ++i)
1032 {
1033 nv04_region_print(i ? src : dst);
1034 fprintf(stderr, i ? "\n" : " <- ");
1035 }
1036 #endif
1037
1038 /* if the destination is not for GPU _and_ source is on CPU, use CPU */
1039 /* if the destination is not for GPU _or_ source is on CPU, use CPU only if we think it's faster than the GPU */
1040 /* TODO: benchmark to find out in which cases exactly we should prefer the CPU */
1041 if((!dst_to_gpu && !src_on_gpu)
1042 || (!dst->pitch && dst->d > 1)
1043 /* 3D swizzled destination are unwritable by the GPU, and 2D swizzled ones are readable only by the 3D engine */
1044 )
1045 return -1;
1046 /* there is no known way to read 2D/3D-swizzled surfaces with the 2D engine
1047 * ask the caller to use the 3D engine
1048 * If a format cannot be sampled from the 3D engine there is no point in making it swizzled, so we must not do so
1049 */
1050 else if(!src->pitch)
1051 {
1052 #ifdef NV04_REGION_DEBUG
1053 fprintf(stderr, "\tCOPY_ENG3D\n");
1054 #endif
1055 return 1;
1056 }
1057 /* Setup transfer to swizzle the texture to vram if needed */
1058 else
1059 {
1060 if (!dst->pitch)
1061 {
1062 if(cs2d_format < 0 || sifm_format < 0 || !dst_to_gpu)
1063 {
1064 #ifdef NV04_REGION_DEBUG
1065 fprintf(stderr, "\tCOPY_ENG3D\n");
1066 #endif
1067 return 1;
1068 }
1069 else
1070 {
1071 assert(!nv04_region_align(dst, w, h, 6));
1072
1073 nv04_region_copy_swizzle(ctx, dst, src, w, h, cs2d_format, sifm_format);
1074 return 0;
1075 }
1076 }
1077 else
1078 {
1079 /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback
1080 * to NV_MEMORY_TO_MEMORY_FORMAT in this case.
1081 * TODO: is this also true for the source? possibly not
1082 */
1083
1084 if ((cs2d_format < 0)
1085 || !dst_to_gpu
1086 || nv04_region_align(src, w, h, 6)
1087 || nv04_region_align(dst, w, h, 6)
1088 )
1089 nv04_region_copy_m2mf(ctx, dst, src, w, h);
1090 else
1091 nv04_region_copy_blit(ctx, dst, src, w, h, cs2d_format);
1092
1093 return 0;
1094 }
1095 }
1096 }
1097
1098 static inline void
1099 nv04_region_fill_gdirect(struct nv04_2d_context *ctx, struct nv04_region* dst, int w, int h, unsigned value)
1100 {
1101 struct nouveau_channel *chan = ctx->surf2d->channel;
1102 struct nouveau_grobj *surf2d = ctx->surf2d;
1103 struct nouveau_grobj *rect = ctx->rect;
1104 int cs2d_format, gdirect_format;
1105
1106 #ifdef NV04_REGION_DEBUG
1107 fprintf(stderr, "\tFILL_GDIRECT\n");
1108 #endif
1109
1110 assert(!(dst->pitch & 63) && dst->pitch);
1111 nv04_region_assert(dst, w, h);
1112
1113 if(dst->bpps == 0)
1114 {
1115 gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
1116 cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
1117 }
1118 else if(dst->bpps == 1)
1119 {
1120 gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
1121 cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y16;
1122 }
1123 else if(dst->bpps == 2)
1124 {
1125 gdirect_format = NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
1126 cs2d_format = NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
1127 }
1128 else
1129 {
1130 assert(0);
1131 gdirect_format = 0;
1132 cs2d_format = 0;
1133 }
1134
1135 MARK_RING (chan, 15, 4);
1136 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
1137 OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1138 OUT_RELOCo(chan, dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1139 BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
1140 OUT_RING (chan, cs2d_format);
1141 OUT_RING (chan, (dst->pitch << 16) | dst->pitch);
1142 OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1143 OUT_RELOCl(chan, dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
1144
1145 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
1146 OUT_RING (chan, gdirect_format);
1147 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
1148 OUT_RING (chan, value);
1149 BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
1150 OUT_RING (chan, (dst->x << 16) | dst->y);
1151 OUT_RING (chan, ( w << 16) | h);
1152 }
1153
1154 int
1155 nv04_region_fill_2d(struct nv04_2d_context *ctx, struct nv04_region *dst,
1156 int w, int h, unsigned value)
1157 {
1158 if(!w || !h)
1159 return 0;
1160
1161 #ifdef NV04_REGION_DEBUG
1162 fprintf(stderr, "FILL [%i, %i: %i] ", w, h, dst->bpps);
1163 nv04_region_print(dst);
1164 fprintf(stderr, " <- 0x%x\n", value);
1165 #endif
1166
1167 if(nv04_region_is_contiguous(dst, w, h))
1168 {
1169 nv04_region_contiguous_shape(dst, &w, &h, 6);
1170 nv04_region_linearize_contiguous(dst, w, h);
1171 }
1172
1173 // TODO: maybe do intermediate copies for some cases instead of using the 3D engine/CPU
1174 /* GdiRect doesn't work together with swzsurf, so the 3D engine, or an intermediate copy, is the only option here */
1175 if(!dst->pitch)
1176 {
1177 #ifdef NV04_REGION_DEBUG
1178 fprintf(stderr, "\tFILL_ENG3D\n");
1179 #endif
1180 return 1;
1181 }
1182 else if(!nv04_region_align(dst, w, h, 6))
1183 {
1184 nv04_region_fill_gdirect(ctx, dst, w, h, value);
1185 return 0;
1186 }
1187 else
1188 return -1;
1189 }
1190
1191
1192 void
1193 nv04_2d_context_takedown(struct nv04_2d_context *ctx)
1194 {
1195 nouveau_notifier_free(&ctx->ntfy);
1196 nouveau_grobj_free(&ctx->m2mf);
1197 nouveau_grobj_free(&ctx->surf2d);
1198 nouveau_grobj_free(&ctx->swzsurf);
1199 nouveau_grobj_free(&ctx->rect);
1200 nouveau_grobj_free(&ctx->blit);
1201 nouveau_grobj_free(&ctx->sifm);
1202
1203 free(ctx);
1204 }
1205
1206 struct nv04_2d_context *
1207 nv04_2d_context_init(struct nouveau_channel* chan)
1208 {
1209 struct nv04_2d_context *ctx = calloc(1, sizeof(struct nv04_2d_context));
1210 unsigned handle = 0x88000000, class;
1211 int ret;
1212
1213 if (!ctx)
1214 return NULL;
1215
1216 ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
1217 if (ret) {
1218 nv04_2d_context_takedown(ctx);
1219 return NULL;
1220 }
1221
1222 ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
1223 if (ret) {
1224 nv04_2d_context_takedown(ctx);
1225 return NULL;
1226 }
1227
1228 BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
1229 OUT_RING (chan, ctx->ntfy->handle);
1230
1231 if (chan->device->chipset < 0x10)
1232 class = NV04_CONTEXT_SURFACES_2D;
1233 else
1234 class = NV10_CONTEXT_SURFACES_2D;
1235
1236 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
1237 if (ret) {
1238 nv04_2d_context_takedown(ctx);
1239 return NULL;
1240 }
1241
1242 BEGIN_RING(chan, ctx->surf2d,
1243 NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
1244 OUT_RING (chan, chan->vram->handle);
1245 OUT_RING (chan, chan->vram->handle);
1246
1247 if (chan->device->chipset < 0x10)
1248 class = NV04_IMAGE_BLIT;
1249 else
1250 class = NV12_IMAGE_BLIT;
1251
1252 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
1253 if (ret) {
1254 nv04_2d_context_takedown(ctx);
1255 return NULL;
1256 }
1257
1258 BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1);
1259 OUT_RING (chan, ctx->ntfy->handle);
1260 BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
1261 OUT_RING (chan, ctx->surf2d->handle);
1262 BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1);
1263 OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY);
1264
1265 ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
1266 &ctx->rect);
1267 if (ret) {
1268 nv04_2d_context_takedown(ctx);
1269 return NULL;
1270 }
1271
1272 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
1273 OUT_RING (chan, ctx->ntfy->handle);
1274 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
1275 OUT_RING (chan, ctx->surf2d->handle);
1276 BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
1277 OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
1278 BEGIN_RING(chan, ctx->rect,
1279 NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
1280 OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
1281
1282 switch (chan->device->chipset & 0xf0) {
1283 case 0x00:
1284 case 0x10:
1285 class = NV04_SWIZZLED_SURFACE;
1286 break;
1287 case 0x20:
1288 class = NV20_SWIZZLED_SURFACE;
1289 break;
1290 case 0x30:
1291 class = NV30_SWIZZLED_SURFACE;
1292 break;
1293 case 0x40:
1294 case 0x60:
1295 class = NV40_SWIZZLED_SURFACE;
1296 break;
1297 default:
1298 /* Famous last words: this really can't happen.. */
1299 assert(0);
1300 break;
1301 }
1302
1303 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
1304 if (ret) {
1305 nv04_2d_context_takedown(ctx);
1306 return NULL;
1307 }
1308
1309 /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */
1310 if(ctx->swzsurf->bound == NOUVEAU_GROBJ_UNBOUND)
1311 nouveau_grobj_autobind(ctx->swzsurf);
1312
1313 switch (chan->device->chipset & 0xf0) {
1314 case 0x10:
1315 case 0x20:
1316 class = NV10_SCALED_IMAGE_FROM_MEMORY;
1317 break;
1318 case 0x30:
1319 class = NV30_SCALED_IMAGE_FROM_MEMORY;
1320 break;
1321 case 0x40:
1322 case 0x60:
1323 class = NV40_SCALED_IMAGE_FROM_MEMORY;
1324 break;
1325 default:
1326 class = NV04_SCALED_IMAGE_FROM_MEMORY;
1327 break;
1328 }
1329
1330 ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
1331 if (ret) {
1332 nv04_2d_context_takedown(ctx);
1333 return NULL;
1334 }
1335
1336 /* all the Gallium MARK_RING calculations assume no autobinding, so do that now */
1337 if(ctx->sifm->bound == NOUVEAU_GROBJ_UNBOUND)
1338 nouveau_grobj_autobind(ctx->sifm);
1339
1340 return ctx;
1341 }