nvfx: rework state_fb code to get rid of render temps
[mesa.git] / src / gallium / drivers / nvfx / nvfx_surface.c
index 7efdd954b4b4183db7fcc1df1f5a4944174658f7..73a5260ac0f34dedddad628fb5331dea9086015e 100644 (file)
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_pack_color.h"
-#include "util/u_rect.h"
 #include "util/u_blitter.h"
+#include "util/u_surface.h"
 
 #include "nouveau/nouveau_winsys.h"
-#include "nouveau/nouveau_util.h"
 #include "nouveau/nouveau_screen.h"
 #include "nvfx_context.h"
 #include "nvfx_screen.h"
@@ -49,6 +48,9 @@ static INLINE void
 nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
 {
        unsigned bits = util_format_get_blocksizebits(format);
+       unsigned shift = 0;
+       rgn->one_bits = 0;
+
        switch(bits)
        {
        case 8:
@@ -56,50 +58,37 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format)
                break;
        case 16:
                rgn->bpps = 1;
+               if(format == PIPE_FORMAT_B5G5R5X1_UNORM)
+                       rgn->one_bits = 1;
                break;
        case 32:
                rgn->bpps = 2;
+               if(format == PIPE_FORMAT_R8G8B8X8_UNORM || format == PIPE_FORMAT_B8G8R8X8_UNORM)
+                       rgn->one_bits = 8;
+               break;
+       case 64:
+               rgn->bpps = 2;
+               shift = 1;
                break;
-       default:
-               assert(util_is_pot(bits));
-               int shift = log2i(bits) - 3;
-               assert(shift >= 2);
+       case 128:
                rgn->bpps = 2;
-               shift -= 2;
+               shift = 2;
+               break;
+       }
 
+       if(shift) {
                rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
                rgn->y = util_format_get_nblocksy(format, rgn->y);
+               rgn->w <<= shift;
        }
 }
 
 static INLINE void
-nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned width, unsigned height, unsigned depth)
-{
-       // TODO: move this code to surface creation?
-       if((depth <= 1) && (height <= 1 || width <= 2))
-               rgn->pitch = width << rgn->bpps;
-       else if(depth > 1 && height <= 2 && width <= 2)
-       {
-               rgn->pitch = width << rgn->bpps;
-               rgn->offset += (zslice * width * height) << rgn->bpps;
-       }
-       else
-       {
-               rgn->pitch = 0;
-               rgn->z = zslice;
-               rgn->w = width;
-               rgn->h = height;
-               rgn->d = depth;
-       }
-}
-
-static INLINE void
-nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)
+nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, boolean for_write)
 {
        rgn->x = x;
        rgn->y = y;
        rgn->z = 0;
-       nvfx_region_set_format(rgn, surf->base.base.format);
 
        if(surf->temp)
        {
@@ -111,20 +100,31 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf,
                        util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base);
        } else {
                rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo;
-               rgn->offset = surf->base.base.offset;
-               rgn->pitch = surf->pitch;
-
-               if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR))
-                       nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level));
+               rgn->offset = surf->offset;
+
+               if(surf->base.base.texture->flags & NOUVEAU_RESOURCE_FLAG_LINEAR)
+                       rgn->pitch = surf->pitch;
+               else
+               {
+                       rgn->pitch = 0;
+                       rgn->z = surf->base.base.u.tex.first_layer;
+                       rgn->w = surf->base.base.width;
+                       rgn->h = surf->base.base.height;
+                       rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.u.tex.level);
+               }
        }
+
+       nvfx_region_set_format(rgn, surf->base.base.format);
+       if(!rgn->pitch)
+               nv04_region_try_to_linearize(rgn);
 }
 
 static INLINE void
-nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)
+nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, boolean for_write)
 {
        if(pt->target != PIPE_BUFFER)
        {
-               struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z);
+               struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, level, z);
                if(ns && util_dirty_surface_is_dirty(&ns->base))
                {
                        nvfx_region_init_for_surface(rgn, ns, x, y, for_write);
@@ -133,68 +133,51 @@ nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource*
        }
 
        rgn->bo = ((struct nvfx_resource*)pt)->bo;
-       rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);
-       rgn->pitch = nvfx_subresource_pitch(pt, sub.level);
+       rgn->offset = nvfx_subresource_offset(pt, z, level, z);
        rgn->x = x;
        rgn->y = y;
-       rgn->z = 0;
-
-       nvfx_region_set_format(rgn, pt->format);
-       if(!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR))
-               nvfx_region_fixup_swizzled(rgn, z, u_minify(pt->width0, sub.level), u_minify(pt->height0, sub.level), u_minify(pt->depth0, sub.level));
-}
-
-// TODO: actually test this for all formats, it's probably wrong for some...
 
-static INLINE int
-nvfx_surface_format(enum pipe_format format)
-{
-       switch(util_format_get_blocksize(format)) {
-       case 1:
-               return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
-       case 2:
-               //return NV04_CONTEXT_SURFACES_2D_FORMAT_Y16;
-               return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
-       case 4:
-               //if(format == PIPE_FORMAT_B8G8R8X8_UNORM || format == PIPE_FORMAT_B8G8R8A8_UNORM)
-                       return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
-               //else
-               //      return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
-       default:
-               return -1;
+       if(pt->flags & NOUVEAU_RESOURCE_FLAG_LINEAR)
+       {
+               rgn->pitch = nvfx_subresource_pitch(pt, level);
+               rgn->z = 0;
        }
-}
-
-static INLINE int
-nv04_scaled_image_format(enum pipe_format format)
-{
-       switch(util_format_get_blocksize(format)) {
-       case 1:
-               return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
-       case 2:
-               //if(format == PIPE_FORMAT_B5G5R5A1_UNORM)
-               //      return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
-               //else
-                       return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
-       case 4:
-               if(format == PIPE_FORMAT_B8G8R8X8_UNORM)
-                       return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
-               else
-                       return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
-       default:
-               return -1;
+       else
+       {
+               rgn->pitch = 0;
+               rgn->z = z;
+               rgn->w = u_minify(pt->width0, level);
+               rgn->h = u_minify(pt->height0, level);
+               rgn->d = u_minify(pt->depth0, level);
        }
+
+       nvfx_region_set_format(rgn, pt->format);
+       if(!rgn->pitch)
+               nv04_region_try_to_linearize(rgn);
 }
 
-// XXX: must save index buffer too!
+// don't save index buffer because blitter doesn't setit
 static struct blitter_context*
 nvfx_get_blitter(struct pipe_context* pipe, int copy)
 {
        struct nvfx_context* nvfx = nvfx_context(pipe);
+       struct blitter_context** pblitter;
+       struct blitter_context* blitter;
+
+       assert(nvfx->blitters_in_use < Elements(nvfx->blitter));
+
+       if(nvfx->query && !nvfx->blitters_in_use)
+       {
+               struct nouveau_channel* chan = nvfx->screen->base.channel;
+               struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+               BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
+               OUT_RING(chan, 0);
+       }
 
-       struct blitter_context* blitter = nvfx->blitter;
-       if(!blitter)
-               nvfx->blitter = blitter = util_blitter_create(pipe);
+       pblitter = &nvfx->blitter[nvfx->blitters_in_use++];
+       if(!*pblitter)
+               *pblitter = util_blitter_create(pipe);
+       blitter = *pblitter;
 
        util_blitter_save_blend(blitter, nvfx->blend);
        util_blitter_save_depth_stencil_alpha(blitter, nvfx->zsa);
@@ -204,7 +187,6 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
        util_blitter_save_vertex_shader(blitter, nvfx->vertprog);
        util_blitter_save_viewport(blitter, &nvfx->viewport);
        util_blitter_save_framebuffer(blitter, &nvfx->framebuffer);
-       util_blitter_save_clip(blitter, &nvfx->clip);
        util_blitter_save_vertex_elements(blitter, nvfx->vtxelt);
        util_blitter_save_vertex_buffers(blitter, nvfx->vtxbuf_nr, nvfx->vtxbuf);
 
@@ -217,6 +199,22 @@ nvfx_get_blitter(struct pipe_context* pipe, int copy)
        return blitter;
 }
 
+static inline void
+nvfx_put_blitter(struct pipe_context* pipe, struct blitter_context* blitter)
+{
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       --nvfx->blitters_in_use;
+       assert(nvfx->blitters_in_use >= 0);
+
+       if(nvfx->query && !nvfx->blitters_in_use)
+       {
+               struct nouveau_channel* chan = nvfx->screen->base.channel;
+               struct nouveau_grobj *eng3d = nvfx->screen->eng3d;
+               BEGIN_RING(chan, eng3d, NV30_3D_QUERY_ENABLE, 1);
+               OUT_RING(chan, 1);
+       }
+}
+
 static unsigned
 nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned w, unsigned h, boolean for_read)
 {
@@ -236,45 +234,61 @@ nvfx_region_clone(struct nv04_2d_context* ctx, struct nv04_region* rgn, unsigned
 
 static void
 nvfx_resource_copy_region(struct pipe_context *pipe,
-                 struct pipe_resource *dstr, struct pipe_subresource subdst,
-                 unsigned dstx, unsigned dsty, unsigned dstz,
-                 struct pipe_resource *srcr, struct pipe_subresource subsrc,
-                 unsigned srcx, unsigned srcy, unsigned srcz,
-                 unsigned w, unsigned h)
+                         struct pipe_resource *dstr, unsigned dst_level,
+                         unsigned dstx, unsigned dsty, unsigned dstz,
+                         struct pipe_resource *srcr, unsigned src_level,
+                         const struct pipe_box *src_box)
 {
+       static int copy_threshold = -1;
        struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
        struct nv04_region dst, src;
+       int dst_to_gpu;
+       int src_on_gpu;
+       boolean small;
+       int ret;
+       unsigned w = src_box->width;
+       unsigned h = src_box->height;
 
        if(!w || !h)
                return;
 
-       static int copy_threshold = -1;
+        /* Fallback for buffers. */
+        if (dstr->target == PIPE_BUFFER && srcr->target == PIPE_BUFFER) {
+                util_resource_copy_region(pipe, dstr, dst_level, dstx, dsty, dstz,
+                                          srcr, src_level, src_box);
+                return;
+        }
+
        if(copy_threshold < 0)
                copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4);
 
-       int dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
-       int src_on_gpu = nvfx_resource_on_gpu(srcr);
+       dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
+       src_on_gpu = nvfx_resource_on_gpu(srcr);
 
-       nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
-       nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
+       nvfx_region_init_for_subresource(&dst, dstr, dst_level, dstx, dsty, dstz, TRUE);
+       nvfx_region_init_for_subresource(&src, srcr, src_level, src_box->x, src_box->y, src_box->z, FALSE);
        w = util_format_get_stride(dstr->format, w) >> dst.bpps;
        h = util_format_get_nblocksy(dstr->format, h);
 
-       int ret;
-       boolean small = (w * h <= copy_threshold);
+       small = (w * h <= copy_threshold);
        if((!dst_to_gpu || !src_on_gpu) && small)
                ret = -1; /* use the CPU */
        else
-               ret = nv04_region_copy_2d(ctx, &dst, &src, w, h,
-                       dstr->target == PIPE_BUFFER ? -1 : nvfx_surface_format(dstr->format),
-                       dstr->target == PIPE_BUFFER ? -1 : nv04_scaled_image_format(dstr->format),
-                       dst_to_gpu, src_on_gpu);
+               ret = nv04_region_copy_2d(ctx, &dst, &src, w, h, dst_to_gpu, src_on_gpu);
        if(!ret)
        {}
-       else if(ret > 0 && dstr->bind & PIPE_BIND_RENDER_TARGET && srcr->bind & PIPE_BIND_SAMPLER_VIEW)
+       else if(ret > 0
+                       && dstr->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)
+                       && srcr->bind & PIPE_BIND_SAMPLER_VIEW)
        {
+               /* this currently works because we hack the bind flags on resource creation to be
+                * the maximum set that the resource type actually supports
+                *
+                * TODO: perhaps support reinterpreting the formats
+                */
                struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
-               util_blitter_copy_region(blitter, dstr, subdst, dstx, dsty, dstz, srcr, subsrc, srcx, srcy, srcz, w, h, TRUE);
+               util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
+               nvfx_put_blitter(pipe, blitter);
        }
        else
        {
@@ -310,6 +324,7 @@ nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,
 {
        struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
        struct nv04_region dst;
+       int ret;
        /* Always try to use the GPU right now, if possible
         * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */
 
@@ -319,7 +334,7 @@ nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,
        w = util_format_get_stride(dsts->format, w) >> dst.bpps;
        h = util_format_get_nblocksy(dsts->format, h);
 
-       int ret = nv04_region_fill_2d(ctx, &dst, w, h, value);
+       ret = nv04_region_fill_2d(ctx, &dst, w, h, value);
        if(ret > 0 && dsts->texture->bind & PIPE_BIND_RENDER_TARGET)
                return 1;
        else if(ret)
@@ -364,31 +379,57 @@ static void
 nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp)
 {
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
-       struct pipe_subresource tempsr, surfsr;
-       struct pipe_resource *idxbuf_buffer;
-       unsigned idxbuf_format;
+       struct pipe_box box;
+       struct nvfx_context* nvfx = nvfx_context(pipe);
+       struct nvfx_miptree* temp;
+       unsigned use_vertex_buffers;
+       boolean use_index_buffer;
+       unsigned base_vertex;
+
+       /* temporarily detach the temp, so it isn't used in place of the actual resource */
+       temp = ns->temp;
+       ns->temp = 0;
+
+       // TODO: we really should do this validation before setting these variable in draw calls
+       use_vertex_buffers = nvfx->use_vertex_buffers;
+       use_index_buffer = nvfx->use_index_buffer;
+       base_vertex = nvfx->base_vertex;
+
+       box.x = box.y = 0;
+       assert(surf->u.tex.first_layer == surf->u.tex.last_layer);
+       box.width = surf->width;
+       box.height = surf->height;
+       box.depth = 1;
+
+       if(to_temp) {
+               box.z = surf->u.tex.first_layer;
+               nvfx_resource_copy_region(pipe, &temp->base.base, 0, 0, 0, 0, surf->texture, surf->u.tex.level, &box);
+       }
+       else {
+               box.z = 0;
+               nvfx_resource_copy_region(pipe, surf->texture, surf->u.tex.level, 0, 0, surf->u.tex.first_layer, &temp->base.base, 0, &box);
+       }
 
-       tempsr.face = 0;
-       tempsr.level = 0;
-       surfsr.face = surf->face;
-       surfsr.level = surf->level;
+       /* If this triggers, it probably means we attempted to use the blitter
+        * but failed due to non-renderability of the target.
+        * Obviously, this would lead to infinite recursion if supported. */
+       assert(!ns->temp);
 
-       // TODO: do this properly, in blitter save
-       idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer;
-       idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format;
+       ns->temp = temp;
 
-       if(to_temp)
-               nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height);
-       else
-               nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height);
+       nvfx->use_vertex_buffers = use_vertex_buffers;
+       nvfx->use_index_buffer = use_index_buffer;
+        nvfx->base_vertex = base_vertex;
 
-       ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer;
-       ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format;
+       nvfx->dirty |= NVFX_NEW_ARRAYS;
+       nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
 }
 
 void
 nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
 {
+       assert (0);
+
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
        struct pipe_resource template;
        memset(&template, 0, sizeof(struct pipe_resource));
@@ -398,7 +439,9 @@ nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf)
        template.height0 = surf->height;
        template.depth0 = 1;
        template.nr_samples = surf->texture->nr_samples;
-       template.flags = NVFX_RESOURCE_FLAG_LINEAR;
+       template.flags = NOUVEAU_RESOURCE_FLAG_LINEAR;
+
+       assert(!ns->temp && !util_dirty_surface_is_dirty(&ns->base));
 
        ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template);
        nvfx_surface_copy_temp(pipe, surf, 1);
@@ -411,11 +454,10 @@ nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
        struct nvfx_surface* ns = (struct nvfx_surface*)surf;
        boolean bound = FALSE;
 
-       /* must be done before the copy, otherwise the copy will use the temp as destination */
-       util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
-
        nvfx_surface_copy_temp(pipe, surf, 0);
 
+       util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base);
+
        if(nvfx->framebuffer.zsbuf == surf)
                bound = TRUE;
        else
@@ -437,19 +479,20 @@ nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf)
 static void
 nvfx_clear_render_target(struct pipe_context *pipe,
                         struct pipe_surface *dst,
-                        const float *rgba,
+                        const union pipe_color_union *color,
                         unsigned dstx, unsigned dsty,
                         unsigned width, unsigned height)
 {
        union util_color uc;
-       util_pack_color(rgba, dst->format, &uc);
+       util_pack_color(color->f, dst->format, &uc);
 
        if(util_format_get_blocksizebits(dst->format) > 32
                || nvfx_surface_fill(pipe, dst, dstx, dsty, width, height, uc.ui))
        {
                // TODO: probably should use hardware clear here instead if possible
                struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
-               util_blitter_clear_render_target(blitter, dst, rgba, dstx, dsty, width, height);
+               util_blitter_clear_render_target(blitter, dst, color, dstx, dsty, width, height);
+               nvfx_put_blitter(pipe, blitter);
        }
 }
 
@@ -468,6 +511,7 @@ nvfx_clear_depth_stencil(struct pipe_context *pipe,
                // TODO: probably should use hardware clear here instead if possible
                struct blitter_context* blitter = nvfx_get_blitter(pipe, 0);
                util_blitter_clear_depth_stencil(blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height);
+               nvfx_put_blitter(pipe, blitter);
        }
 }