nvfx: rework state_fb code to get rid of render temps
[mesa.git] / src / gallium / drivers / nvfx / nvfx_surface.c
index 806f1a22e65e3b42ee5349b3d1657261c8c5bf05..73a5260ac0f34dedddad628fb5331dea9086015e 100644 (file)
@@ -32,8 +32,8 @@
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_pack_color.h"
-#include "util/u_rect.h"
 #include "util/u_blitter.h"
+#include "util/u_surface.h"
 
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_screen.h"
@@ -48,6 +48,9 @@ static INLINE void
 nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
 {
        unsigned bits = util_format_get_blocksizebits(format);
+       unsigned shift = 0;
+       rgn->one_bits = 0;
+
        switch(bits)
        {
        case 8:
@@ -55,53 +58,37 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
                break;
        case 16:
                rgn->bpps = 1;
+               if(format == PIPE_FORMAT_B5G5R5X1_UNORM)
+                       rgn->one_bits = 1;
                break;
        case 32:
                rgn->bpps = 2;
+               if(format == PIPE_FORMAT_R8G8B8X8_UNORM || format == PIPE_FORMAT_B8G8R8X8_UNORM)
+                       rgn->one_bits = 8;
+               break;
+       case 64:
+               rgn->bpps = 2;
+               shift = 1;
+               break;
+       case 128:
+               rgn->bpps = 2;
+               shift = 2;
                break;
-       default:
-               {
-                       int shift;
-                       assert(util_is_pot(bits));
-                       shift = util_logbase2(bits) - 3;
-                       assert(shift >= 2);
-                       rgn->bpps = 2;
-                       shift -= 2;
-
-                       rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
-                       rgn->y = util_format_get_nblocksy(format, rgn->y);
-               }
        }
-}
 
-static INLINE void
-nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned width, unsigned height, unsigned depth)
-{
-       // TODO: move this code to surface creation?
-       if((depth <= 1) && (height <= 1 || width <= 2))
-               rgn->pitch = width << rgn->bpps;
-       else if(depth > 1 && height <= 2 && width <= 2)
-       {
-               rgn->pitch = width << rgn->bpps;
-               rgn->offset += (zslice * width * height) << rgn->bpps;
-       }
-       else
-       {
-               rgn->pitch = 0;
-               rgn->z = zslice;
-               rgn->w = width;
-               rgn->h = height;
-               rgn->d = depth;
+       if(shift) {
+               rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
+               rgn->y = util_format_get_nblocksy(format, rgn->y);
+               rgn->w <<= shift;
        }
 }
 
 static INLINE void
-nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
+nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, boolean for_write)
 {
        rgn->x = x;
        rgn->y = y;
        rgn->z = 0;
-       nvfx_region_set_format(rgn, surf->base.base.format);
 
        if(surf->temp)
        {
@@ -113,20 +100,31 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf,
                        util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
        } else {
                rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
-               rgn->offset = surf->base.base.offset;
-               rgn->pitch = surf->pitch;
-
-               if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
-                       nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level));
+               rgn->offset = surf->offset;
+
+               if(surf->base.base.texture->flags & NOUVEAU_RESOURCE_FLAG_LINEAR)
+                       rgn->pitch = surf->pitch;
+               else
+               {
+                       rgn->pitch = 0;
+                       rgn->z = surf->base.base.u.tex.first_layer;
+                       rgn->w = surf->base.base.width;
+                       rgn->h = surf->base.base.height;
+                       rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.u.tex.level);
+               }
        }
+
+       nvfx_region_set_format(rgn, surf->base.base.format);
+       if(!rgn->pitch)
+               nv04_region_try_to_linearize(rgn);
 }
 
 static INLINE void
-nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, boolean for_write)
 {
        if(pt->target != PIPE_BUFFER)
        {
-               struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
+               struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, level, z);
                if(ns && util_dirty_surface_is_dirty(&ns->base))
                {
                        nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
@@ -135,68 +133,51 @@ nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource*
        }
 
        rgn->bo = ((struct nvfx_resource*)pt)->bo;
-       rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
-       rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
+       rgn->offset = nvfx_subresource_offset(pt, z, level, z);
        rgn->x = x;
        rgn->y = y;
-       rgn->z = 0;
-
-       nvfx_region_set_format(rgn, pt->format);
-       if(!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR))
-               nvfx_region_fixup_swizzled(rgn, z, u_minify(pt->width0, sub.level), u_minify(pt->height0, sub.level), u_minify(pt->depth0, sub.level));
-}
-
-// TODO: actually test this for all formats, it's probably wrong for some...
 
-static INLINE int
-nvfx_surface_format(enum pipe_format format)
-{
-       switch(util_format_get_blocksize(format)) {
-       case 1:
-               return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
-       case 2:
-               //return NV04_CONTEXT_SURFACES_2D_FORMAT_Y16;
-               return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
-       case 4:
-               //if(format == PIPE_FORMAT_B8G8R8X8_UNORM || format == PIPE_FORMAT_B8G8R8A8_UNORM)
-                       return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
-               //else
-               //      return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
-       default:
-               return -1;
+       if(pt->flags & NOUVEAU_RESOURCE_FLAG_LINEAR)
+       {
+               rgn->pitch = nvfx_subresource_pitch(pt, level);
+               rgn->z = 0;
        }
-}
-
-static INLINE int
-nv04_scaled_image_format(enum pipe_format format)
-{
-       switch(util_format_get_blocksize(format)) {
-       case 1:
-               return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
-       case 2:
-               //if(format == PIPE_FORMAT_B5G5R5A1_UNORM)
-               //      return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
-               //else
-                       return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
-       case 4:
-               if(format == PIPE_FORMAT_B8G8R8X8_UNORM)
-                       return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
-               else
-                       return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
-       default:
-               return -1;
+       else
+       {
+               rgn->pitch = 0;
+               rgn->z = z;
+               rgn->w = u_minify(pt->width0, level);
+               rgn->h = u_minify(pt->height0, level);
+               rgn->d = u_minify(pt->depth0, level);
        }
+
+       nvfx_region_set_format(rgn, pt->format);
+       if(!rgn->pitch)
+               nv04_region_try_to_linearize(rgn);
 }
 
-// XXX: must save index buffer too!
+// don't save index buffer because blitter doesn't setit
 static struct blitter_context*
 nvfx_get_blitter(struct pipe_context* pipe, int copy)
 {
        struct nvfx_context* nvfx = nvfx_context(pipe);
+       struct blitter_context** pblitter;
+       struct blitter_context* blitter;
 
-       struct blitter_context* blitter = nvfx->blitter;
-       if(!blitter)
-               nvfx->blitter = blitter = util_blitter_create(pipe);
+       assert(nvfx->blitters_in_use < Elements(nvfx->blitter));
+
+       if(nvfx->query && !nvfx->blitters_in_use)
+       {
+               struct nouveau_channel* chan = nvfx->screen->base.channel;
+               struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+               BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
+               OUT_RING(chan, 0);
+       }
+
+       pblitter = &nvfx->blitter[nvfx->blitters_in_use++];
+       if(!*pblitter)
+               *pblitter = util_blitter_create(pipe);
+       blitter = *pblitter;
 
        util_blitter_save_blend(blitter, nvfx->blend);
        util_blitter_save_depth_stencil_alpha(blitter, nvfx->zsa);
@@ -206,7 +187,6 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
        util_blitter_save_vertex_shader(blitter, nvfx->vertprog);
        util_blitter_save_viewport(blitter, &nvfx->viewport);
        util_blitter_save_framebuffer(blitter, &nvfx->framebuffer);
-       util_blitter_save_clip(blitter, &nvfx->clip);
        util_blitter_save_vertex_elements(blitter, nvfx->vtxelt);
        util_blitter_save_vertex_buffers(blitter, nvfx->vtxbuf_nr, nvfx->vtxbuf);
 
@@ -219,6 +199,22 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
        return blitter;
 }
 
+static inline void
+nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
+{
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       --nvfx->blitters_in_use;
+       assert(nvfx->blitters_in_use >= 0);
+
+       if(nvfx->query && !nvfx->blitters_in_use)
+       {
+               struct nouveau_channel* chan = nvfx->screen->base.channel;
+               struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+               BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
+               OUT_RING(chan, 1);
+       }
+}
+
 static unsigned
 nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned w, unsigned h, boolean for_read)
 {
@@ -238,11 +234,10 @@ nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned
 
 static void
 nvfx_resource_copy_region(struct pipe_context *pipe,
-                 struct pipe_resource *dstr, struct pipe_subresource subdst,
-                 unsigned dstx, unsigned dsty, unsigned dstz,
-                 struct pipe_resource *srcr, struct pipe_subresource subsrc,
-                 unsigned srcx, unsigned srcy, unsigned srcz,
-                 unsigned w, unsigned h)
+                         struct pipe_resource *dstr, unsigned dst_level,
+                         unsigned dstx, unsigned dsty, unsigned dstz,
+                         struct pipe_resource *srcr, unsigned src_level,
+                         const struct pipe_box *src_box)
 {
        static int copy_threshold = -1;
        struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
@@ -251,18 +246,27 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
        int src_on_gpu;
        boolean small;
        int ret;
+       unsigned w = src_box->width;
+       unsigned h = src_box->height;
 
        if(!w || !h)
                return;
 
+        /* Fallback for buffers. */
+        if (dstr->target == PIPE_BUFFER && srcr->target == PIPE_BUFFER) {
+                util_resource_copy_region(pipe, dstr, dst_level, dstx, dsty, dstz,
+                                          srcr, src_level, src_box);
+                return;
+        }
+
        if(copy_threshold < 0)
                copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4);
 
        dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
        src_on_gpu = nvfx_resource_on_gpu(srcr);
 
-       nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
-       nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
+       nvfx_region_init_for_subresource(&dst, dstr, dst_level, dstx, dsty, dstz, TRUE);
+       nvfx_region_init_for_subresource(&src, srcr, src_level, src_box->x, src_box->y, src_box->z, FALSE);
        w = util_format_get_stride(dstr->format, w) >> dst.bpps;
        h = util_format_get_nblocksy(dstr->format, h);
 
@@ -270,16 +274,21 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
        if((!dst_to_gpu || !src_on_gpu) && small)
                ret = -1; /* use the CPU */
        else
-               ret = nv04_region_copy_2d(ctx, &dst, &src, w, h,
-                       dstr->target == PIPE_BUFFER ? -1 : nvfx_surface_format(dstr->format),
-                       dstr->target == PIPE_BUFFER ? -1 : nv04_scaled_image_format(dstr->format),
-                       dst_to_gpu, src_on_gpu);
+               ret = nv04_region_copy_2d(ctx, &dst, &src, w, h, dst_to_gpu, src_on_gpu);
        if(!ret)
        {}
-       else if(ret > 0 && dstr->bind & PIPE_BIND_RENDER_TARGET && srcr->bind & PIPE_BIND_SAMPLER_VIEW)
+       else if(ret > 0
+                       && dstr->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)
+                       && srcr->bind & PIPE_BIND_SAMPLER_VIEW)
        {
+               /* this currently works because we hack the bind flags on resource creation to be
+                * the maximum set that the resource type actually supports
+                *
+                * TODO: perhaps support reinterpreting the formats
+                */
                struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
-               util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE);
+               util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
+               nvfx_put_blitter(pipe, blitter);
        }
        else
        {
@@ -370,23 +379,43 @@ static void
 nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
 {
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
-       struct pipe_subresource tempsr, surfsr;
+       struct pipe_box box;
        struct nvfx_context* nvfx = nvfx_context(pipe);
+       struct nvfx_miptree* temp;
+       unsigned use_vertex_buffers;
+       boolean use_index_buffer;
+       unsigned base_vertex;
+
+       /* temporarily detach the temp, so it isn't used in place of the actual resource */
+       temp = ns->temp;
+       ns->temp = 0;
 
        // TODO: we really should do this validation before setting these variable in draw calls
-       unsigned use_vertex_buffers = nvfx->use_vertex_buffers;
-       boolean use_index_buffer = nvfx->use_index_buffer;
-       unsigned base_vertex = nvfx->base_vertex;
+       use_vertex_buffers = nvfx->use_vertex_buffers;
+       use_index_buffer = nvfx->use_index_buffer;
+       base_vertex = nvfx->base_vertex;
+
+       box.x = box.y = 0;
+       assert(surf->u.tex.first_layer == surf->u.tex.last_layer);
+       box.width = surf->width;
+       box.height = surf->height;
+       box.depth = 1;
+
+       if(to_temp) {
+               box.z = surf->u.tex.first_layer;
+               nvfx_resource_copy_region(pipe, &temp->base.base, 0, 0, 0, 0, surf->texture, surf->u.tex.level, &box);
+       }
+       else {
+               box.z = 0;
+               nvfx_resource_copy_region(pipe, surf->texture, surf->u.tex.level, 0, 0, surf->u.tex.first_layer, &temp->base.base, 0, &box);
+       }
 
-       tempsr.face = 0;
-       tempsr.level = 0;
-       surfsr.face = surf->face;
-       surfsr.level = surf->level;
+       /* If this triggers, it probably means we attempted to use the blitter
+        * but failed due to non-renderability of the target.
+        * Obviously, this would lead to infinite recursion if supported. */
+       assert(!ns->temp);
 
-       if(to_temp)
-               nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
-       else
-               nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
+       ns->temp = temp;
 
        nvfx->use_vertex_buffers = use_vertex_buffers;
        nvfx->use_index_buffer = use_index_buffer;
@@ -399,6 +428,8 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int
 void
 nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
 {
+       assert (0);
+
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
        struct pipe_resource template;
        memset(&template, 0, sizeof(struct pipe_resource));
@@ -408,7 +439,9 @@ nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
        template.height0 = surf->height;
        template.depth0 = 1;
        template.nr_samples = surf->texture->nr_samples;
-       template.flags = NVFX_RESOURCE_FLAG_LINEAR;
+       template.flags = NOUVEAU_RESOURCE_FLAG_LINEAR;
+
+       assert(!ns->temp && !util_dirty_surface_is_dirty(&ns->base));
 
        ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template);
        nvfx_surface_copy_temp(pipe, surf, 1);
@@ -421,11 +454,10 @@ nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
        boolean bound = FALSE;
 
-       /* must be done before the copy, otherwise the copy will use the temp as destination */
-       util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
-
        nvfx_surface_copy_temp(pipe, surf, 0);
 
+       util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
+
        if(nvfx->framebuffer.zsbuf == surf)
                bound = TRUE;
        else
@@ -447,19 +479,20 @@ nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
 static void
 nvfx_clear_render_target(struct pipe_context *pipe,
                         struct pipe_surface *dst,
-                        const float *rgba,
+                        const union pipe_color_union *color,
                         unsigned dstx, unsigned dsty,
                         unsigned width, unsigned height)
 {
        union util_color uc;
-       util_pack_color(rgba, dst->format, &uc);
+       util_pack_color(color->f, dst->format, &uc);
 
        if(util_format_get_blocksizebits(dst->format) > 32
                || nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, uc.ui))
        {
                // TODO: probably should use hardware clear here instead if possible
                struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
-               util_blitter_clear_render_target(blitter, dst, rgba, dstx, dsty, width, height);
+               util_blitter_clear_render_target(blitter, dst, color, dstx, dsty, width, height);
+               nvfx_put_blitter(pipe, blitter);
        }
 }
 
@@ -478,6 +511,7 @@ nvfx_clear_depth_stencil(struct pipe_context *pipe,
                // TODO: probably should use hardware clear here instead if possible
                struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
                util_blitter_clear_depth_stencil(blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height);
+               nvfx_put_blitter(pipe, blitter);
        }
 }