nouveau: Fix nv20-40 swizzled miptree RTs
authorLuca Barbieri <luca@luca-barbieri.com>
Sun, 27 Dec 2009 03:04:46 +0000 (04:04 +0100)
committerYounes Manton <younes.m@gmail.com>
Mon, 28 Dec 2009 22:59:01 +0000 (17:59 -0500)
I just coded a patch that does this and seems to work fine. It must be
fixed since it breaks OpenGL (or the state tracker can be changed, but
it seems better to do it in the driver).

The patch also fixes NV20 and NV30 in the same way. They compile but
are untested.

I would guess that using the 3D engine is faster for the larger
levels, but the 2D engine is faster for the smaller ones (and lacks
this issue).

src/gallium/drivers/nouveau/nouveau_winsys.h
src/gallium/drivers/nv04/nv04_surface_2d.c
src/gallium/drivers/nv04/nv04_surface_2d.h
src/gallium/drivers/nv20/nv20_miptree.c
src/gallium/drivers/nv20/nv20_transfer.c
src/gallium/drivers/nv30/nv30_miptree.c
src/gallium/drivers/nv30/nv30_transfer.c
src/gallium/drivers/nv40/nv40_miptree.c
src/gallium/drivers/nv40/nv40_transfer.c

index 42c77e5e77890fb15e8403cbc6d485ea29f95255..4c3e08a43f5d8131ce78761580922c8fef1022d4 100644 (file)
@@ -23,6 +23,9 @@
 #define NOUVEAU_BUFFER_USAGE_ZETA     (1 << 17)
 #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18)
 
+/* use along with GPU_WRITE for 2D-only writes */
+#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19)
+
 extern struct pipe_screen *
 nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
 
index 12df7fd1997b4c0eb07389e7f8cf37dc89cf2bef..819f45e96ad4a5813ace9f61b34296bc8a6088bf 100644 (file)
@@ -491,3 +491,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen)
        ctx->fill = nv04_surface_fill;
        return ctx;
 }
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
+{
+       int temp_flags;
+
+       // printf("creating temp, flags is %i!\n", flags);
+
+       if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD)
+       {
+               temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ;
+               ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD;
+       }
+       else
+       {
+               temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+               ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ;
+       }
+
+       struct nv40_screen* screen = (struct nv40_screen*)pscreen;
+       ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+
+       struct pipe_texture templ;
+       memset(&templ, 0, sizeof(templ));
+       templ.format = ns->base.texture->format;
+       templ.target = PIPE_TEXTURE_2D;
+       templ.width0 = ns->base.width;
+       templ.height0 = ns->base.height;
+       templ.depth0 = 1;
+       templ.last_level = 0;
+
+       // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
+       templ.nr_samples = ns->base.texture->nr_samples;
+
+       templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+       struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ);
+       struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
+       temp_ns->backing = ns;
+
+       if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ)
+               eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height);
+
+       return temp_ns;
+}
+
index 02b3f56ba8b8c89a06f19e9488334803b5a08c92..ce696a11a39e44a9fda8425a082f2a0ecdab559d 100644 (file)
@@ -4,6 +4,7 @@
 struct nv04_surface {
        struct pipe_surface base;
        unsigned pitch;
+       struct nv04_surface* backing;
 };
 
 struct nv04_surface_2d {
@@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen);
 void
 nv04_surface_2d_takedown(struct nv04_surface_2d **);
 
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
+
 #endif
index d1291a92e0ab1405c6e8bb18860ae27b2a905caf..8f7538e7f576af2528ee1383c6f800f5ccab8bdd 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "nv20_context.h"
 #include "nv20_screen.h"
+#include "../nv04/nv04_surface_2d.h"
 
 static void
 nv20_miptree_layout(struct nv20_miptree *nv20mt)
@@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
        if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
                buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
 
+       /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+        * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+        * This also happens for small mipmaps of large textures. */
+       if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+               mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
        nv20_miptree_layout(mt);
 
        mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size);
@@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
                ns->base.offset = nv20mt->level[level].image_offset[0];
        }
 
+       /* create a linear temporary that we can render into if necessary.
+        * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+        * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+       if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+               return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base;
+
        return &ns->base;
 }
 
 static void
 nv20_miptree_surface_destroy(struct pipe_surface *ps)
 {
+       struct nv04_surface* ns = (struct nv04_surface*)ps;
+       if(ns->backing)
+       {
+               struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen;
+               if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+                       screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+               nv20_miptree_surface_destroy(&ns->backing->base);
+       }
+       
        pipe_texture_reference(&ps->texture, NULL);
        FREE(ps);
 }
index 69b79c809f470ff4209950bd2f0dcf83852c06b6..7b5118863577de37bf425dbd21a8095a6514fd8f 100644 (file)
@@ -126,7 +126,7 @@ nv20_transfer_del(struct pipe_transfer *ptx)
 
                dst = pscreen->get_tex_surface(pscreen, ptx->texture,
                                               ptx->face, ptx->level, ptx->zslice,
-                                              PIPE_BUFFER_USAGE_GPU_WRITE);
+                                              PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
 
                /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
                nvscreen->eng2d->copy(nvscreen->eng2d,
index ce95d9700f60702ad7ff6a1aa377d49e64bf2c55..8fbba38e78f30d9cf59f6f4e8465f4c1fdf64103 100644 (file)
@@ -5,6 +5,7 @@
 #include "util/u_math.h"
 
 #include "nv30_context.h"
+#include "../nv04/nv04_surface_2d.h"
 
 static void
 nv30_miptree_layout(struct nv30_miptree *nv30mt)
@@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
        if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
                buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
 
+       /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+        * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+        * This also happens for small mipmaps of large textures. */
+       if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+               mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
        nv30_miptree_layout(mt);
 
        mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage,
@@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
                ns->base.offset = nv30mt->level[level].image_offset[0];
        }
 
+       /* create a linear temporary that we can render into if necessary.
+        * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+        * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+       if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+               return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base;
+
        return &ns->base;
 }
 
 static void
 nv30_miptree_surface_del(struct pipe_surface *ps)
 {
+       struct nv04_surface* ns = (struct nv04_surface*)ps;
+       if(ns->backing)
+       {
+               struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen;
+               if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+                       screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+               nv30_miptree_surface_del(&ns->backing->base);
+       }
+
        pipe_texture_reference(&ps->texture, NULL);
        FREE(ps);
 }
index 2255a02caedb059e5add52eb7f44413b64b073be..68047c47ec52f7f64d00d8f22f0a3c786d6147f0 100644 (file)
@@ -126,7 +126,7 @@ nv30_transfer_del(struct pipe_transfer *ptx)
 
                dst = pscreen->get_tex_surface(pscreen, ptx->texture,
                                               ptx->face, ptx->level, ptx->zslice,
-                                              PIPE_BUFFER_USAGE_GPU_WRITE);
+                                              PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
 
                /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
                nvscreen->eng2d->copy(nvscreen->eng2d,
index b974e68a077643c6857da233fea18dd0a7c4cdee..89bd155ff493ace556940c692e5f211d6d9d3c2d 100644 (file)
@@ -5,6 +5,7 @@
 #include "util/u_math.h"
 
 #include "nv40_context.h"
+#include "../nv04/nv04_surface_2d.h"
 
 
 
@@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
        if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
                buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
 
+       /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+        * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+        * This also happens for small mipmaps of large textures. */
+       if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+               mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
        nv40_miptree_layout(mt);
 
        mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size);
@@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
                ns->base.offset = mt->level[level].image_offset[0];
        }
 
+       /* create a linear temporary that we can render into if necessary.
+        * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+        * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+       if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+               return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base;
+
        return &ns->base;
 }
 
 static void
 nv40_miptree_surface_del(struct pipe_surface *ps)
 {
+       struct nv04_surface* ns = (struct nv04_surface*)ps;
+       if(ns->backing)
+       {
+               struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen;
+               if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+                       screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+               nv40_miptree_surface_del(&ns->backing->base);
+       }
+
        pipe_texture_reference(&ps->texture, NULL);
        FREE(ps);
 }
index b084a38b48273cbb7ebb42f2d0f36f8ffbffa9a8..adfd0356213534ea2a2cbb8ed304f2ee35521fab 100644 (file)
@@ -126,7 +126,7 @@ nv40_transfer_del(struct pipe_transfer *ptx)
 
                dst = pscreen->get_tex_surface(pscreen, ptx->texture,
                                               ptx->face, ptx->level, ptx->zslice,
-                                              PIPE_BUFFER_USAGE_GPU_WRITE);
+                                              PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
 
                /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
                nvscreen->eng2d->copy(nvscreen->eng2d,