From c907b947130c884de09e48e1ecbeecc9afc9f75b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 23 Aug 2010 16:43:04 +0200 Subject: [PATCH] nvfx: emit bo relocations only when needed Should improve performance, possibly significantly. --- src/gallium/drivers/nvfx/nvfx_context.c | 6 +++-- src/gallium/drivers/nvfx/nvfx_context.h | 28 ++++++++++++++++++-- src/gallium/drivers/nvfx/nvfx_fragprog.c | 3 +++ src/gallium/drivers/nvfx/nvfx_fragtex.c | 2 ++ src/gallium/drivers/nvfx/nvfx_screen.c | 11 ++++++++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 30 ++++++++++------------ src/gallium/drivers/nvfx/nvfx_state_fb.c | 2 ++ src/gallium/drivers/nvfx/nvfx_vbo.c | 3 +++ 8 files changed, 65 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 2f775f92cf5..5a2fa14c887 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -46,6 +46,9 @@ nvfx_destroy(struct pipe_context *pipe) if (nvfx->draw) draw_destroy(nvfx->draw); + if(nvfx->screen->cur_ctx == nvfx) + nvfx->screen->cur_ctx = NULL; + FREE(nvfx); } @@ -72,8 +75,6 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; - screen->base.channel->user_private = nvfx; - nvfx->is_nv4x = screen->is_nv4x; /* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs * However, my code for that doesn't work, so use vp clipping for all cards, which works. @@ -103,6 +104,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->hw_pointsprite_control = -1; nvfx->hw_vp_output = -1; nvfx->use_vertex_buffers = -1; + nvfx->relocs_needed = NVFX_RELOCATE_ALL; LIST_INITHEAD(&nvfx->render_cache); diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 680f4c6ce0f..4c654bfa8ba 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -47,6 +47,13 @@ #define NVFX_NEW_INDEX (1 << 16) #define NVFX_NEW_SPRITE (1 << 17) +#define NVFX_RELOCATE_FRAMEBUFFER (1 << 0) +#define NVFX_RELOCATE_FRAGTEX (1 << 1) +#define NVFX_RELOCATE_FRAGPROG (1 << 2) +#define NVFX_RELOCATE_VTXBUF (1 << 3) +#define NVFX_RELOCATE_IDXBUF (1 << 4) +#define NVFX_RELOCATE_ALL 0x1f + struct nvfx_rasterizer_state { struct pipe_rasterizer_state pipe; unsigned sb_len; @@ -199,6 +206,8 @@ struct nvfx_context { int hw_pointsprite_control; int hw_vp_output; struct nvfx_fragment_program* hw_fragprog; + + unsigned relocs_needed; }; static INLINE struct nvfx_context * @@ -290,10 +299,25 @@ extern void nvfx_state_sr_validate(struct nvfx_context *nvfx); extern void nvfx_state_zsa_validate(struct nvfx_context *nvfx); /* nvfx_state_emit.c */ -extern void nvfx_state_relocate(struct nvfx_context *nvfx); +extern void nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs); extern boolean nvfx_state_validate(struct nvfx_context *nvfx); extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); -extern void nvfx_state_emit(struct nvfx_context *nvfx); + +static inline void +nvfx_state_emit(struct nvfx_context *nvfx) +{ + unsigned relocs = NVFX_RELOCATE_FRAMEBUFFER | NVFX_RELOCATE_FRAGTEX | NVFX_RELOCATE_FRAGPROG; + if (nvfx->render_mode == HW) + { + relocs |= NVFX_RELOCATE_VTXBUF; + if(nvfx->use_index_buffer) + relocs |= NVFX_RELOCATE_IDXBUF; + } + + relocs &= nvfx->relocs_needed; + if(relocs) + nvfx_state_relocate(nvfx, relocs); +} /* nvfx_transfer.c */ extern void nvfx_init_transfer_functions(struct pipe_context *pipe); diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 049b814d49f..7caddfab707 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1471,6 +1471,8 @@ update: nvfx->hw_pointsprite_control = pointsprite_control; } } + + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG; } void @@ -1487,6 +1489,7 @@ nvfx_fragprog_relocate(struct nvfx_context *nvfx) OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG; } void diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index 00c47be76ab..6503c7afcbf 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -205,6 +205,7 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) } } nvfx->dirty_samplers = 0; + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX; } void @@ -231,6 +232,7 @@ nvfx_fragtex_relocate(struct nvfx_context *nvfx) OUT_RELOC(chan, bo, nvfx->hw_txf[unit], tex_flags | NOUVEAU_BO_OR | NOUVEAU_BO_DUMMY, NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX; } void diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 72cb5239b5a..99b4d8b58c0 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -377,6 +377,14 @@ nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen) return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART; } +static void nvfx_channel_flush_notify(struct nouveau_channel* chan) +{ + struct nvfx_screen* screen = chan->user_private; + struct nvfx_context* nvfx = screen->cur_ctx; + if(nvfx) + nvfx->relocs_needed = NVFX_RELOCATE_ALL; +} + struct pipe_screen * nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -398,6 +406,9 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } chan = screen->base.channel; + screen->cur_ctx = NULL; + chan->user_private = screen; + chan->flush_notify = nvfx_channel_flush_notify; pscreen->winsys = ws; pscreen->destroy = nvfx_screen_destroy; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index c43a75aaa21..cfcb0f7ef66 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -20,6 +20,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) nvfx->hw_pointsprite_control = -1; nvfx->hw_vp_output = -1; nvfx->screen->cur_ctx = nvfx; + nvfx->relocs_needed = NVFX_RELOCATE_ALL; } /* These can trigger use the of 3D engine to copy temporaries. @@ -244,12 +245,12 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) return TRUE; } -void -nvfx_state_emit(struct nvfx_context *nvfx) +inline void +nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs) { struct nouveau_channel* chan = nvfx->screen->base.channel; /* we need to ensure there is enough space to output relocations in one go */ - unsigned max_relocs = 0 + const unsigned max_relocs = 0 + 16 /* vertex buffers, incl. dma flag */ + 2 /* index buffer plus format+dma flag */ + 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */ @@ -257,22 +258,19 @@ nvfx_state_emit(struct nvfx_context *nvfx) + 2 * 4 /* vertex textures plus format+dma flag */ + 1 /* fragprog incl dma flag */ ; + MARK_RING(chan, max_relocs * 2, max_relocs * 2); - nvfx_state_relocate(nvfx); -} -void -nvfx_state_relocate(struct nvfx_context *nvfx) -{ - nvfx_framebuffer_relocate(nvfx); - nvfx_fragtex_relocate(nvfx); - nvfx_fragprog_relocate(nvfx); - if (nvfx->render_mode == HW) - { + if(relocs & NVFX_RELOCATE_FRAMEBUFFER) + nvfx_framebuffer_relocate(nvfx); + if(relocs & NVFX_RELOCATE_FRAGTEX) + nvfx_fragtex_relocate(nvfx); + if(relocs & NVFX_RELOCATE_FRAGPROG) + nvfx_fragprog_relocate(nvfx); + if(relocs & NVFX_RELOCATE_VTXBUF) nvfx_vbo_relocate(nvfx); - if(nvfx->use_index_buffer) - nvfx_idxbuf_relocate(nvfx); - } + if(relocs & NVFX_RELOCATE_IDXBUF) + nvfx_idxbuf_relocate(nvfx); } boolean diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 3db9cec9054..3b869d43a15 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -278,6 +278,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1)); OUT_RING(chan, 0); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; } void @@ -307,4 +308,5 @@ nvfx_framebuffer_relocate(struct nvfx_context *nvfx) DO(NV40, 3); DO_(nvfx->hw_zeta, NV34, ZETA); + nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 21d6e0e6f84..e6e9a8f2e40 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -334,6 +334,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) OUT_RING(chan, 0); nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; return TRUE; } @@ -362,6 +363,7 @@ nvfx_vbo_relocate(struct nvfx_context *nvfx) vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1); } + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; } static void @@ -382,6 +384,7 @@ nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags) OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0); OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, 0, NV34TCL_IDXBUF_FORMAT_DMA1); + nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF; } void -- 2.30.2