X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fgallium%2Fdrivers%2Fnvfx%2Fnvfx_state_emit.c;h=28b8c107576ba1a08cf62f8237b75fc38a431dba;hb=7a830ac49f8ac29d9f5a7abc0c6474e6495d0a7b;hp=f91ae19ecd383f0d7adad5996daef8b5c4334b71;hpb=64644ec3b21884d4a974fa29087fa98c4ed9e112;p=mesa.git diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index f91ae19ecd3..28b8c107576 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -1,15 +1,232 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" #include "draw/draw_context.h" +void +nvfx_state_viewport_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct pipe_viewport_state *vpt = &nvfx->viewport; + + WAIT_RING(chan, 11); + if(nvfx->render_mode == HW) { + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8)); + OUT_RINGf(chan, vpt->translate[0]); + OUT_RINGf(chan, vpt->translate[1]); + OUT_RINGf(chan, vpt->translate[2]); + OUT_RINGf(chan, vpt->translate[3]); + OUT_RINGf(chan, vpt->scale[0]); + OUT_RINGf(chan, vpt->scale[1]); + OUT_RINGf(chan, vpt->scale[2]); + OUT_RINGf(chan, vpt->scale[3]); + OUT_RING(chan, RING_3D(0x1d78, 1)); + OUT_RING(chan, 1); + } else { + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8)); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RING(chan, RING_3D(0x1d78, 1)); + OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); + } +} + +void +nvfx_state_scissor_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct pipe_scissor_state *s = &nvfx->scissor; + + if ((rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) + return; + nvfx->state.scissor_enabled = rast->scissor; + + WAIT_RING(chan, 3); + OUT_RING(chan, RING_3D(NV34TCL_SCISSOR_HORIZ, 2)); + if (nvfx->state.scissor_enabled) { + OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); + } else { + OUT_RING(chan, 4096 << 16); + OUT_RING(chan, 4096 << 16); + } +} + +void +nvfx_state_sr_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct pipe_stencil_ref *sr = &nvfx->stencil_ref; + + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV34TCL_STENCIL_FRONT_FUNC_REF, 1)); + OUT_RING(chan, sr->ref_value[0]); + OUT_RING(chan, RING_3D(NV34TCL_STENCIL_BACK_FUNC_REF, 1)); + OUT_RING(chan, sr->ref_value[1]); +} + +void +nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct pipe_blend_color *bcol = &nvfx->blend_colour; + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_BLEND_COLOR, 1)); + OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); +} + +void +nvfx_state_stipple_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + + WAIT_RING(chan, 33); + OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32)); + OUT_RINGp(chan, nvfx->stipple, 32); +} + +static void +nvfx_coord_conventions_validate(struct nvfx_context* nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned value = nvfx->hw_fragprog->coord_conventions; + if(value & NV34TCL_COORD_CONVENTIONS_ORIGIN_INVERTED) + value |= nvfx->framebuffer.height << NV34TCL_COORD_CONVENTIONS_HEIGHT_SHIFT; + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_COORD_CONVENTIONS, 1)); + OUT_RING(chan, value); +} + +static void +nvfx_ucp_validate(struct nvfx_context* nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned enables[7] = + { + 0, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5, + }; + + if(!nvfx->use_vp_clipping) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, 0); + + WAIT_RING(chan, 6 * 4 + 1); + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANE_A(0), nvfx->clip.nr * 4)); + OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4); + } + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, enables[nvfx->clip.nr]); +} + +static void +nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned i; + struct nvfx_vertex_program* vp = nvfx->vertprog; + if(nvfx->clip.nr != vp->clip_nr) + { + unsigned idx; + WAIT_RING(chan, 14); + + /* remove last instruction bit */ + if(vp->clip_nr >= 0) + { + idx = vp->nr_insns - 7 + vp->clip_nr; + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start + idx); + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4)); + OUT_RINGp (chan, vp->insns[idx].data, 4); + } + + /* set last instruction bit */ + idx = vp->nr_insns - 7 + nvfx->clip.nr; + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start + idx); + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4)); + OUT_RINGp(chan, vp->insns[idx].data, 3); + OUT_RING(chan, vp->insns[idx].data[3] | 1); + vp->clip_nr = nvfx->clip.nr; + } + + // TODO: only do this for the ones changed + WAIT_RING(chan, 6 * 6); + for(i = 0; i < nvfx->clip.nr; ++i) + { + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_CONST_ID, 5)); + OUT_RING(chan, vp->data->start + i); + OUT_RINGp (chan, nvfx->clip.ucp[i], 4); + } +} + static boolean nvfx_state_validate_common(struct nvfx_context *nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; - unsigned dirty = nvfx->dirty; + unsigned dirty; + unsigned still_dirty = 0; + int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */ + boolean flush_tex_cache = FALSE; + unsigned render_temps; if(nvfx != nvfx->screen->cur_ctx) - dirty = ~0; + { + nvfx->dirty = ~0; + nvfx->hw_vtxelt_nr = 16; + nvfx->hw_pointsprite_control = -1; + nvfx->hw_vp_output = -1; + nvfx->screen->cur_ctx = nvfx; + nvfx->relocs_needed = NVFX_RELOCATE_ALL; + } + + /* These can trigger use the of 3D engine to copy temporaries. + * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop.. + * This converges to having clean temps, then binding both fragtexes and framebuffers. + */ + while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER)) + { + if(nvfx->dirty & NVFX_NEW_SAMPLER) + { + nvfx->dirty &=~ NVFX_NEW_SAMPLER; + nvfx_fragtex_validate(nvfx); + + // TODO: only set this if really necessary + flush_tex_cache = TRUE; + } + + if(nvfx->dirty & NVFX_NEW_FB) + { + nvfx->dirty &=~ NVFX_NEW_FB; + new_fb_mode = nvfx_framebuffer_prepare(nvfx); + + // TODO: make sure this doesn't happen, i.e. fbs have matching formats + assert(new_fb_mode >= 0); + } + } + + dirty = nvfx->dirty; if(nvfx->render_mode == HW) { @@ -19,11 +236,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) return FALSE; } - if(dirty & (NVFX_NEW_ARRAYS)) + if(dirty & NVFX_NEW_ARRAYS) { if(!nvfx_vbo_validate(nvfx)) return FALSE; } + + if(dirty & NVFX_NEW_INDEX) + { + if(nvfx->use_index_buffer) + nvfx_idxbuf_validate(nvfx); + else + still_dirty = NVFX_NEW_INDEX; + } } else { @@ -31,13 +256,10 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) nvfx_vertprog_validate(nvfx); - if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG)) + if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG)) nvfx_vtxfmt_validate(nvfx); } - if(dirty & NVFX_NEW_FB) - nvfx_state_framebuffer_validate(nvfx); - if(dirty & NVFX_NEW_RAST) sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); @@ -47,11 +269,35 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_STIPPLE) nvfx_state_stipple_validate(nvfx); - if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST)) + if(nvfx->dirty & NVFX_NEW_UCP) + nvfx_ucp_validate(nvfx); + + if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG))) + nvfx_vertprog_ucp_validate(nvfx); + + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE)) + { nvfx_fragprog_validate(nvfx); + if(dirty & NVFX_NEW_FRAGPROG) + flush_tex_cache = TRUE; // TODO: do we need this? + } + + if(nvfx->is_nv4x) + { + unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or; + vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6); - if(dirty & NVFX_NEW_SAMPLER) - nvfx_fragtex_validate(nvfx); + if(vp_output != nvfx->hw_vp_output) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1)); + OUT_RING(chan, vp_output); + nvfx->hw_vp_output = vp_output; + } + } + + if(new_fb_mode >= 0) + nvfx_framebuffer_validate(nvfx, new_fb_mode); if(dirty & NVFX_NEW_BLEND) sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); @@ -65,31 +311,65 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_SR) nvfx_state_sr_validate(nvfx); -/* Having this depend on FB looks wrong, but it seems - necessary to make this work on nv3x +/* All these dependencies are wrong, but otherwise + etracer, neverball, foobillard, glest totally misrender TODO: find the right fix */ - if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB)) + if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA) || (new_fb_mode >= 0)) + { nvfx_state_viewport_validate(nvfx); + } - /* TODO: could nv30 need this or something similar too? */ - if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) { - WAIT_RING(chan, 4); - OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); - OUT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); - OUT_RING(chan, 1); + if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0)) + { + WAIT_RING(chan, 3); + OUT_RING(chan, RING_3D(NV34TCL_DEPTH_WRITE_ENABLE, 2)); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); + } + + if((new_fb_mode >= 0) || (dirty & NVFX_NEW_FRAGPROG)) + nvfx_coord_conventions_validate(nvfx); + + if(flush_tex_cache) + { + // TODO: what about nv30? + if(nvfx->is_nv4x) + { + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 1); + } + } + + nvfx->dirty = dirty & still_dirty; + + render_temps = nvfx->state.render_temps; + if(render_temps) + { + for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) + { + if(render_temps & (1 << i)) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), + (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); + } + + if(render_temps & 0x80) + util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), + (struct util_dirty_surface*)nvfx->framebuffer.zsbuf); } - nvfx->dirty = 0; + return TRUE; } -void -nvfx_state_emit(struct nvfx_context *nvfx) +inline void +nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs) { struct nouveau_channel* chan = nvfx->screen->base.channel; /* we need to ensure there is enough space to output relocations in one go */ - unsigned max_relocs = 0 + const unsigned max_relocs = 0 + 16 /* vertex buffers, incl. dma flag */ + 2 /* index buffer plus format+dma flag */ + 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */ @@ -97,18 +377,19 @@ nvfx_state_emit(struct nvfx_context *nvfx) + 2 * 4 /* vertex textures plus format+dma flag */ + 1 /* fragprog incl dma flag */ ; + MARK_RING(chan, max_relocs * 2, max_relocs * 2); - nvfx_state_relocate(nvfx); -} -void -nvfx_state_relocate(struct nvfx_context *nvfx) -{ - nvfx_framebuffer_relocate(nvfx); - nvfx_fragtex_relocate(nvfx); - nvfx_fragprog_relocate(nvfx); - if (nvfx->render_mode == HW) + if(relocs & NVFX_RELOCATE_FRAMEBUFFER) + nvfx_framebuffer_relocate(nvfx); + if(relocs & NVFX_RELOCATE_FRAGTEX) + nvfx_fragtex_relocate(nvfx); + if(relocs & NVFX_RELOCATE_FRAGPROG) + nvfx_fragprog_relocate(nvfx); + if(relocs & NVFX_RELOCATE_VTXBUF) nvfx_vbo_relocate(nvfx); + if(relocs & NVFX_RELOCATE_IDXBUF) + nvfx_idxbuf_relocate(nvfx); } boolean @@ -134,9 +415,6 @@ nvfx_state_validate(struct nvfx_context *nvfx) if(!nvfx_state_validate_common(nvfx)) return FALSE; - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); - return TRUE; } @@ -173,6 +451,9 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); } + if (nvfx->draw_dirty & NVFX_NEW_INDEX) + draw_set_index_buffer(draw, &nvfx->idxbuf); + nvfx_state_validate_common(nvfx); nvfx->draw_dirty = 0;