From 28eb392a853bb43bdc6cfe7ba814f046c39ca7ae Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 3 Aug 2010 05:47:41 +0200 Subject: [PATCH] nvfx: new 2D: new render temporaries with resources This patch adds support for creating temporary surfaces to allow rendering to surfaces that cannot be rendered to. It uses the _second_ version of the render temporary infrastructure. This is necessary for swizzled 3D textures and small mipmaps of swizzled 2D textures. This version of the patch creates a resource to use as a temporary instead of a raw BO, making the code simpler. --- src/gallium/drivers/nvfx/nvfx_context.c | 4 + src/gallium/drivers/nvfx/nvfx_context.h | 6 +- src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 + src/gallium/drivers/nvfx/nvfx_miptree.c | 51 +++-- src/gallium/drivers/nvfx/nvfx_resource.h | 36 ++- src/gallium/drivers/nvfx/nvfx_state_emit.c | 81 +++++-- src/gallium/drivers/nvfx/nvfx_state_fb.c | 251 +++++++++++++-------- src/gallium/drivers/nvfx/nvfx_surface.c | 121 +++++++++- 8 files changed, 408 insertions(+), 146 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 3d45f5f0baf..7ab81de7dd5 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -15,6 +15,7 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags, struct nouveau_channel *chan = screen->base.channel; struct nouveau_grobj *eng3d = screen->eng3d; + /* XXX: we need to actually be intelligent here */ if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(chan, eng3d, 0x1fd8, 1); OUT_RING (chan, 2); @@ -87,5 +88,8 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) /* set these to that we init them on first validation */ nvfx->state.scissor_enabled = ~0; nvfx->state.stipple_enabled = ~0; + + LIST_INITHEAD(&nvfx->render_cache); + return &nvfx->pipe; } diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 278be94d525..a6ea9139675 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -11,6 +11,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_inlines.h" +#include "util/u_double_list.h" #include "draw/draw_vertex.h" #include "util/u_blitter.h" @@ -67,6 +68,7 @@ struct nvfx_state { unsigned scissor_enabled; unsigned stipple_enabled; unsigned fp_samplers; + unsigned render_temps; }; struct nvfx_vtxelt_state { @@ -90,6 +92,7 @@ struct nvfx_context { struct draw_context *draw; struct blitter_context* blitter; + struct list_head render_cache; /* HW state derived from pipe states */ struct nvfx_state state; @@ -185,7 +188,8 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx); /* nvfx_fb.c */ -extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx); +extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx); +extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result); void nvfx_framebuffer_relocate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index 0b4a434fecc..66057454337 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -16,6 +16,10 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) samplers &= ~(1 << unit); if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) { + util_dirty_surfaces_use_for_sampling(&nvfx->pipe, + &((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces, + nvfx_surface_flush); + if(!nvfx->is_nv4x) nv30_fragtex_set(nvfx, unit); else diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 7deb9d7b9a3..530d705e136 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -11,6 +11,7 @@ #include "nvfx_screen.h" #include "nvfx_resource.h" #include "nvfx_transfer.h" +#include "nv04_2d.h" static void nvfx_miptree_choose_format(struct nvfx_miptree *mt) @@ -114,17 +115,24 @@ nvfx_miptree_get_handle(struct pipe_screen *pscreen, } +static void +nvfx_miptree_surface_final_destroy(struct pipe_surface* ps) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)ps; + pipe_resource_reference(&ps->texture, 0); + pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); + FREE(ps); +} + static void nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt) { struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + util_surfaces_destroy(&mt->surfaces, pt, nvfx_miptree_surface_final_destroy); nouveau_screen_bo_release(screen, mt->base.bo); FREE(mt); } - - - struct u_resource_vtbl nvfx_miptree_vtbl = { nvfx_miptree_get_handle, /* get_handle */ @@ -152,6 +160,8 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso mt->base.base = *pt; mt->base.vtbl = &nvfx_miptree_vtbl; + util_dirty_surfaces_init(&mt->dirty_surfaces); + pipe_reference_init(&mt->base.base.reference, 1); mt->base.base.screen = pscreen; @@ -218,29 +228,28 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { + struct nvfx_miptree* mt = (struct nvfx_miptree*)pt; struct nvfx_surface *ns; - ns = CALLOC_STRUCT(nvfx_surface); - if (!ns) - return NULL; - pipe_resource_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nvfx_subresource_pitch(pt, level); - ns->base.offset = nvfx_subresource_offset(pt, face, level, zslice); - - return &ns->base; + ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags); + if(ns->base.base.offset == ~0) { + util_dirty_surface_init(&ns->base); + ns->pitch = nvfx_subresource_pitch(pt, level); + ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice); + } + + return &ns->base.base; } void nvfx_miptree_surface_del(struct pipe_surface *ps) { - pipe_resource_reference(&ps->texture, NULL); - FREE(ps); + struct nvfx_surface* ns = (struct nvfx_surface*)ps; + + if(!ns->temp) + { + util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps); + pipe_resource_reference(&ps->texture, 0); + FREE(ps); + } } diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h index 42d04ebb370..be1845dd9c1 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.h +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -1,13 +1,16 @@ - #ifndef NVFX_RESOURCE_H #define NVFX_RESOURCE_H #include "util/u_transfer.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_double_list.h" +#include "util/u_surfaces.h" +#include "util/u_dirty_surfaces.h" #include struct pipe_resource; +struct nv04_region; /* This gets further specialized into either buffer or texture @@ -38,17 +41,34 @@ nvfx_resource_on_gpu(struct pipe_resource* pr) #define NVFX_MAX_TEXTURE_LEVELS 16 +/* We have the following invariants for render temporaries + * + * 1. Render temporaries are always linear + * 2. Render temporaries are always up to date + * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use + * + * Also, we do NOT flush temporaries on any pipe->flush(). + * This is fine, as long as scanout targets and shared resources never need temps. + * + * TODO: we may want to also support swizzled temporaries to improve performance in some cases. + */ + struct nvfx_miptree { struct nvfx_resource base; unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */ unsigned face_size; /* 128-byte aligned face/total size */ unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS]; + + struct util_surfaces surfaces; + struct util_dirty_surfaces dirty_surfaces; }; struct nvfx_surface { - struct pipe_surface base; + struct util_dirty_surface base; unsigned pitch; + + struct nvfx_miptree* temp; }; static INLINE @@ -65,6 +85,12 @@ nvfx_surface_buffer(struct pipe_surface *surf) return mt->bo; } +static INLINE struct util_dirty_surfaces* +nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; + return &mt->dirty_surfaces; +} void nvfx_init_resource_functions(struct pipe_context *pipe); @@ -141,4 +167,10 @@ nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level) } } +void +nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf); + +void +nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index f91ae19ecd3..dc70f3de870 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -1,15 +1,48 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" #include "draw/draw_context.h" static boolean nvfx_state_validate_common(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - unsigned dirty = nvfx->dirty; + unsigned dirty; + int all_swizzled = -1; + boolean flush_tex_cache = FALSE; if(nvfx != nvfx->screen->cur_ctx) - dirty = ~0; + { + nvfx->dirty = ~0; + nvfx->screen->cur_ctx = nvfx; + } + + /* These can trigger use the of 3D engine to copy temporaries. + * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop.. + * This converges to having clean temps, then binding both fragtexes and framebuffers. + */ + while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER)) + { + if(nvfx->dirty & NVFX_NEW_SAMPLER) + { + nvfx->dirty &=~ NVFX_NEW_SAMPLER; + nvfx_fragtex_validate(nvfx); + + // TODO: only set this if really necessary + flush_tex_cache = TRUE; + } + + if(nvfx->dirty & NVFX_NEW_FB) + { + nvfx->dirty &=~ NVFX_NEW_FB; + all_swizzled = nvfx_framebuffer_prepare(nvfx); + + // TODO: make sure this doesn't happen, i.e. fbs have matching formats + assert(all_swizzled >= 0); + } + } + + dirty = nvfx->dirty; if(nvfx->render_mode == HW) { @@ -35,9 +68,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) nvfx_vtxfmt_validate(nvfx); } - if(dirty & NVFX_NEW_FB) - nvfx_state_framebuffer_validate(nvfx); - if(dirty & NVFX_NEW_RAST) sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); @@ -48,10 +78,14 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) nvfx_state_stipple_validate(nvfx); if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST)) + { nvfx_fragprog_validate(nvfx); + if(dirty & NVFX_NEW_FRAGPROG) + flush_tex_cache = TRUE; // TODO: do we need this? + } - if(dirty & NVFX_NEW_SAMPLER) - nvfx_fragtex_validate(nvfx); + if(all_swizzled >= 0) + nvfx_framebuffer_validate(nvfx, all_swizzled); if(dirty & NVFX_NEW_BLEND) sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); @@ -72,13 +106,17 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB)) nvfx_state_viewport_validate(nvfx); - /* TODO: could nv30 need this or something similar too? */ - if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); - OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); - OUT_RING(chan, 1); + if(flush_tex_cache) + { + // TODO: what about nv30? + if(nvfx->is_nv4x) + { + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 1); + } } nvfx->dirty = 0; return TRUE; @@ -99,6 +137,21 @@ nvfx_state_emit(struct nvfx_context *nvfx) ; MARK_RING(chan, max_relocs * 2, max_relocs * 2); nvfx_state_relocate(nvfx); + + unsigned render_temps = nvfx->state.render_temps; + if(render_temps) + { + for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) + { + if(render_temps & (1 << i)) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), + (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); + } + + if(render_temps & 0x80) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), + (struct util_dirty_surface*)nvfx->framebuffer.zsbuf); + } } void diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index e111d11627f..80b0f21575f 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -1,19 +1,56 @@ #include "nvfx_context.h" #include "nvfx_resource.h" #include "nouveau/nouveau_util.h" +#include "util/u_format.h" -void -nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) +static inline boolean +nvfx_surface_linear_renderable(struct pipe_surface* surf) +{ + return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR) + && !(surf->offset & 63) + && !(((struct nvfx_surface*)surf)->pitch & 63); +} + +static inline boolean +nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf) +{ + /* TODO: return FALSE if we have a format not supporting swizzled rendering (e.g. r8); currently those are not supported at all */ + return !((struct nvfx_miptree*)surf->texture)->linear_pitch + && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1) + && !(surf->offset & 127) + && (surf->width == fb->width) + && (surf->height == fb->height) + && !((struct nvfx_surface*)surf)->temp; +} + +static boolean +nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + if(!ns->temp) + { + target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo; + target->offset = surf->offset; + target->pitch = align(ns->pitch, 64); + assert(target->pitch); + return FALSE; + } + else + { + target->offset = 0; + target->pitch = ns->temp->linear_pitch; + target->bo = ns->temp->base.bo; + assert(target->pitch); + return TRUE; + } +} + +int +nvfx_framebuffer_prepare(struct nvfx_context *nvfx) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; - struct nouveau_channel *chan = nvfx->screen->base.channel; - uint32_t rt_enable = 0, rt_format = 0; - int i, colour_format = 0, zeta_format = 0; - int depth_only = 0; - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - int colour_bits = 32, zeta_bits = 32; + int i, color_format = 0, zeta_format = 0; + int all_swizzled = 1; if(!nvfx->is_nv4x) assert(fb->nr_cbufs <= 2); @@ -21,113 +58,135 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) assert(fb->nr_cbufs <= 4); for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) - assert(colour_format == fb->cbufs[i]->format); - else - colour_format = fb->cbufs[i]->format; - - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - nvfx->hw_rt[i].bo = ((struct nvfx_miptree*)fb->cbufs[i]->texture)->base.bo; - nvfx->hw_rt[i].offset = fb->cbufs[i]->offset; - nvfx->hw_rt[i].pitch = ((struct nvfx_surface *)fb->cbufs[i])->pitch; + if (color_format) { + if(color_format != fb->cbufs[i]->format) + return -1; + } else + color_format = fb->cbufs[i]->format; + + if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i])) + all_swizzled = 0; } - for(; i < 4; ++i) - nvfx->hw_rt[i].bo = 0; + if (fb->zsbuf) { + /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */ + if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf)) + all_swizzled = 0; + + if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format)) + all_swizzled = 0; + } + + for (i = 0; i < fb->nr_cbufs; i++) { + if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i])) + nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]); + } + + if(fb->zsbuf) { + if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf)) + nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf); + } + + return all_swizzled; +} + +void +nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) +{ + struct pipe_framebuffer_state *fb = &nvfx->framebuffer; + struct nouveau_channel *chan = nvfx->screen->base.channel; + uint32_t rt_enable, rt_format; + int i; + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = (NV34TCL_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1; if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) rt_enable |= NV34TCL_RT_ENABLE_MRT; + nvfx->state.render_temps = 0; + + for (i = 0; i < fb->nr_cbufs; i++) + nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i; + + for(; i < 4; ++i) + nvfx->hw_rt[i].bo = 0; + if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - nvfx->hw_zeta.bo = ((struct nvfx_miptree*)fb->zsbuf->texture)->base.bo; - nvfx->hw_zeta.offset = fb->zsbuf->offset; - nvfx->hw_zeta.pitch = ((struct nvfx_surface *)fb->zsbuf)->pitch; - } - else - nvfx->hw_zeta.bo = 0; - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | - NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { - /* Render to at least a colour buffer */ - if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else if (fb->zsbuf) { - depth_only = 1; - - /* Render to depth buffer only */ - if (!(fb->zsbuf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else { - return; + nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7; + + assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch); + assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size); } - switch (colour_format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_B5G6R5_UNORM: + if (prepare_result) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + + if(fb->nr_cbufs > 0) { + switch (fb->cbufs[0]->format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + } else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2) rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - colour_bits = 16; - break; - default: - assert(0); - } + else + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: + if(fb->zsbuf) { + switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + } else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2) rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - zeta_bits = 16; - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case 0: + else rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { - /* TODO: does this limitation really exist? - TODO: can it be worked around somehow? */ - assert(0); - } + if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || fb->zsbuf) { + struct nvfx_render_target *rt0 = &nvfx->hw_rt[0]; + uint32_t pitch; + + if(!(rt_enable & NV34TCL_RT_ENABLE_COLOR0)) + rt0 = &nvfx->hw_zeta; - if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) - || ((!nvfx->is_nv4x) && depth_only)) { - struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]); - uint32_t pitch = rt0->pitch; + pitch = rt0->pitch; if(!nvfx->is_nv4x) { - if (nvfx->hw_zeta.bo) { + if (nvfx->hw_zeta.bo) pitch |= (nvfx->hw_zeta.pitch << 16); - } else { + else pitch |= (pitch << 16); - } } + //printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch); + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1)); OUT_RELOC(chan, rt0->bo, 0, rt_flags | NOUVEAU_BO_OR, @@ -180,7 +239,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) } } - if (zeta_format) { + if (fb->zsbuf) { OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1)); OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, rt_flags | NOUVEAU_BO_OR, diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index a97f342c646..8208c67f2a2 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -94,23 +94,44 @@ nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned wi } static INLINE void -nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y) +nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write) { - rgn->bo = ((struct nvfx_resource*)surf->base.texture)->bo; - rgn->offset = surf->base.offset; - rgn->pitch = surf->pitch; rgn->x = x; rgn->y = y; rgn->z = 0; + nvfx_region_set_format(rgn, surf->base.base.format); - nvfx_region_set_format(rgn, surf->base.format); - if(!(surf->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) - nvfx_region_fixup_swizzled(rgn, surf->base.zslice, surf->base.width, surf->base.height, u_minify(surf->base.texture->depth0, surf->base.level)); + if(surf->temp) + { + rgn->bo = surf->temp->base.bo; + rgn->offset = 0; + rgn->pitch = surf->temp->linear_pitch; + + if(for_write) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base); + } else { + rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo; + rgn->offset = surf->base.base.offset; + rgn->pitch = surf->pitch; + + if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) + nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level)); + } } static INLINE void -nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z) +nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write) { + if(pt->target != PIPE_BUFFER) + { + struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z); + if(ns && util_dirty_surface_is_dirty(&ns->base)) + { + nvfx_region_init_for_surface(rgn, ns, x, y, for_write); + return; + } + } + rgn->bo = ((struct nvfx_resource*)pt)->bo; rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z); rgn->pitch = nvfx_subresource_pitch(pt, sub.level); @@ -165,6 +186,7 @@ nv04_scaled_image_format(enum pipe_format format) } } +// XXX: must save index buffer too! static struct blitter_context* nvfx_get_blitter(struct pipe_context* pipe, int copy) { @@ -237,8 +259,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe, int dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING; int src_on_gpu = nvfx_resource_on_gpu(srcr); - nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz); - nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz); + nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE); + nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE); w = util_format_get_stride(dstr->format, w) >> dst.bpps; h = util_format_get_nblocksy(dstr->format, h); @@ -293,10 +315,11 @@ nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts, struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d; struct nv04_region dst; /* Always try to use the GPU right now, if possible - * If the user wanted the surface data on the CPU, he would have cleared with memset */ + * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */ // we don't care about interior pixel order since we set all them to the same value - nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy); + nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE); + w = util_format_get_stride(dsts->format, w) >> dst.bpps; h = util_format_get_nblocksy(dsts->format, h); @@ -341,6 +364,80 @@ nvfx_screen_surface_init(struct pipe_screen *pscreen) return 0; } +static void +nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + struct pipe_subresource tempsr, surfsr; + struct pipe_resource *idxbuf_buffer; + unsigned idxbuf_format; + + tempsr.face = 0; + tempsr.level = 0; + surfsr.face = surf->face; + surfsr.level = surf->level; + + // TODO: do this properly, in blitter save + idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer; + idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format; + + if(to_temp) + nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height); + else + nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height); + + ((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer; + ((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format; +} + +void +nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf) +{ + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + struct pipe_resource template; + memset(&template, 0, sizeof(struct pipe_resource)); + template.target = PIPE_TEXTURE_2D; + template.format = surf->format; + template.width0 = surf->width; + template.height0 = surf->height; + template.depth0 = 1; + template.nr_samples = surf->texture->nr_samples; + template.flags = NVFX_RESOURCE_FLAG_LINEAR; + + ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template); + nvfx_surface_copy_temp(pipe, surf, 1); +} + +void +nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf) +{ + struct nvfx_context* nvfx = (struct nvfx_context*)pipe; + struct nvfx_surface* ns = (struct nvfx_surface*)surf; + boolean bound = FALSE; + + /* must be done before the copy, otherwise the copy will use the temp as destination */ + util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base); + + nvfx_surface_copy_temp(pipe, surf, 0); + + if(nvfx->framebuffer.zsbuf == surf) + bound = TRUE; + else + { + for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) + { + if(nvfx->framebuffer.cbufs[i] == surf) + { + bound = TRUE; + break; + } + } + } + + if(!bound) + pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); +} + static void nvfx_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, -- 2.30.2