docs: Update status of GL 3.x related extensions
[mesa.git] / src / gallium / drivers / nvfx / nvfx_state_emit.c
index 9d28b590746877aaf6e6a1f455216f4a6bf06822..501fdd4430c986a63f11f639ce43766cca302487 100644 (file)
 #include "nvfx_context.h"
 #include "nvfx_state.h"
+#include "nvfx_resource.h"
 #include "draw/draw_context.h"
 
-#define RENDER_STATES(name, nvxx, vbo) \
-static struct nvfx_state_entry *name##_render_states[] = { \
-       &nvfx_state_framebuffer, \
-       &nvfx_state_rasterizer, \
-       &nvfx_state_scissor, \
-       &nvfx_state_stipple, \
-       &nvfx_state_fragprog, \
-       &nvxx##_state_fragtex, \
-       &nvfx_state_vertprog, \
-       &nvfx_state_blend, \
-       &nvfx_state_blend_colour, \
-       &nvfx_state_zsa, \
-       &nvfx_state_sr, \
-       &nvfx_state_viewport, \
-       &nvfx_state_##vbo, \
-       NULL \
+void
+nvfx_state_viewport_validate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
+       struct pipe_viewport_state *vpt = &nvfx->viewport;
+
+       WAIT_RING(chan, 11);
+       if(nvfx->render_mode == HW) {
+               OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
+               OUT_RINGf(chan, vpt->translate[0]);
+               OUT_RINGf(chan, vpt->translate[1]);
+               OUT_RINGf(chan, vpt->translate[2]);
+               OUT_RINGf(chan, vpt->translate[3]);
+               OUT_RINGf(chan, vpt->scale[0]);
+               OUT_RINGf(chan, vpt->scale[1]);
+               OUT_RINGf(chan, vpt->scale[2]);
+               OUT_RINGf(chan, vpt->scale[3]);
+               OUT_RING(chan, RING_3D(0x1d78, 1));
+               OUT_RING(chan, 1);
+       } else {
+               OUT_RING(chan, RING_3D(NV30_3D_VIEWPORT_TRANSLATE_X, 8));
+               OUT_RINGf(chan, 0.0f);
+               OUT_RINGf(chan, 0.0f);
+               OUT_RINGf(chan, 0.0f);
+               OUT_RINGf(chan, 0.0f);
+               OUT_RINGf(chan, 1.0f);
+               OUT_RINGf(chan, 1.0f);
+               OUT_RINGf(chan, 1.0f);
+               OUT_RINGf(chan, 1.0f);
+               OUT_RING(chan, RING_3D(0x1d78, 1));
+               OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1);
+       }
+}
+
+void
+nvfx_state_scissor_validate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
+       struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe;
+       struct pipe_scissor_state *s = &nvfx->scissor;
+
+       if ((rast->scissor == 0 && nvfx->state.scissor_enabled == 0))
+               return;
+       nvfx->state.scissor_enabled = rast->scissor;
+
+       WAIT_RING(chan, 3);
+       OUT_RING(chan, RING_3D(NV30_3D_SCISSOR_HORIZ, 2));
+       if (nvfx->state.scissor_enabled) {
+               OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx);
+               OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny);
+       } else {
+               OUT_RING(chan, 4096 << 16);
+               OUT_RING(chan, 4096 << 16);
+       }
+}
+
+void
+nvfx_state_sr_validate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       struct pipe_stencil_ref *sr = &nvfx->stencil_ref;
+
+       WAIT_RING(chan, 4);
+       OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(0), 1));
+       OUT_RING(chan, sr->ref_value[0]);
+       OUT_RING(chan, RING_3D(NV30_3D_STENCIL_FUNC_REF(1), 1));
+       OUT_RING(chan, sr->ref_value[1]);
+}
+
+void
+nvfx_state_blend_colour_validate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       struct pipe_blend_color *bcol = &nvfx->blend_colour;
+
+       WAIT_RING(chan, 2);
+       OUT_RING(chan, RING_3D(NV30_3D_BLEND_COLOR, 1));
+       OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) |
+                      (float_to_ubyte(bcol->color[0]) << 16) |
+                      (float_to_ubyte(bcol->color[1]) <<  8) |
+                      (float_to_ubyte(bcol->color[2]) <<  0)));
+}
+
+void
+nvfx_state_stipple_validate(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel *chan = nvfx->screen->base.channel;
+
+       WAIT_RING(chan, 33);
+       OUT_RING(chan, RING_3D(NV30_3D_POLYGON_STIPPLE_PATTERN(0), 32));
+       OUT_RINGp(chan, nvfx->stipple, 32);
 }
 
-RENDER_STATES(nv30, nv30, vbo);
-RENDER_STATES(nv30_swtnl, nv30, vtxfmt);
-RENDER_STATES(nv40, nv40, vbo);
-RENDER_STATES(nv40_swtnl, nv40, vtxfmt);
+static void
+nvfx_coord_conventions_validate(struct nvfx_context* nvfx)
+{
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned value = nvfx->hw_fragprog->coord_conventions;
+       if(value & NV30_3D_COORD_CONVENTIONS_ORIGIN_INVERTED)
+               value |= nvfx->framebuffer.height << NV30_3D_COORD_CONVENTIONS_HEIGHT__SHIFT;
+
+       WAIT_RING(chan, 2);
+       OUT_RING(chan, RING_3D(NV30_3D_COORD_CONVENTIONS, 1));
+       OUT_RING(chan, value);
+}
 
 static void
-nvfx_state_do_validate(struct nvfx_context *nvfx,
-                      struct nvfx_state_entry **states)
+nvfx_ucp_validate(struct nvfx_context* nvfx)
 {
-       while (*states) {
-               struct nvfx_state_entry *e = *states;
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned enables[7] =
+       {
+                       0,
+                       NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0,
+                       NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1,
+                       NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2,
+                       NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3,
+                       NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE4,
+                       NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE0 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE1 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE2 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE3 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE4 | NV30_3D_VP_CLIP_PLANES_ENABLE_PLANE5,
+       };
 
-               if (nvfx->dirty & e->dirty.pipe) {
-                       if (e->validate(nvfx))
-                               nvfx->state.dirty |= (1ULL << e->dirty.hw);
-               }
+       if(!nvfx->use_vp_clipping)
+       {
+               WAIT_RING(chan, 2);
+               OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
+               OUT_RING(chan, 0);
 
-               states++;
+               WAIT_RING(chan, 6 * 4 + 1);
+               OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4));
+               OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
        }
-       nvfx->dirty = 0;
+
+       WAIT_RING(chan, 2);
+       OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
+       OUT_RING(chan, enables[nvfx->clip.nr]);
 }
 
-void
-nvfx_state_emit(struct nvfx_context *nvfx)
+static void
+nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
 {
-       struct nvfx_state *state = &nvfx->state;
-       struct nvfx_screen *screen = nvfx->screen;
-       struct nouveau_channel *chan = screen->base.channel;
-       struct nouveau_grobj *eng3d = screen->eng3d;
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
        unsigned i;
-       uint64_t states;
+       struct nvfx_vertex_program* vp = nvfx->hw_vertprog;
+       if(nvfx->clip.nr != vp->clip_nr)
+       {
+               unsigned idx;
+               WAIT_RING(chan, 14);
+
+               /* remove last instruction bit */
+               if(vp->clip_nr >= 0)
+               {
+                       idx = vp->nr_insns - 7 + vp->clip_nr;
+                       OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+                       OUT_RING(chan,  vp->exec->start + idx);
+                       OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+                       OUT_RINGp (chan, vp->insns[idx].data, 4);
+               }
+
+                /* set last instruction bit */
+               idx = vp->nr_insns - 7 + nvfx->clip.nr;
+               OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
+               OUT_RING(chan,  vp->exec->start + idx);
+               OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
+               OUT_RINGp(chan, vp->insns[idx].data, 3);
+               OUT_RING(chan, vp->insns[idx].data[3] | 1);
+               vp->clip_nr = nvfx->clip.nr;
+       }
 
-       /* XXX: race conditions
+       // TODO: only do this for the ones changed
+       WAIT_RING(chan, 6 * 6);
+       for(i = 0; i < nvfx->clip.nr; ++i)
+       {
+               OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
+               OUT_RING(chan, vp->data->start + i);
+               OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
+       }
+}
+
+static boolean
+nvfx_state_validate_common(struct nvfx_context *nvfx)
+{
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       unsigned dirty;
+       unsigned still_dirty = 0;
+       int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */
+       boolean flush_tex_cache = FALSE;
+       unsigned render_temps;
+
+       if(nvfx != nvfx->screen->cur_ctx)
+       {
+               nvfx->dirty = ~0;
+               nvfx->hw_vtxelt_nr = 16;
+               nvfx->hw_pointsprite_control = -1;
+               nvfx->hw_vp_output = -1;
+               nvfx->screen->cur_ctx = nvfx;
+               nvfx->relocs_needed = NVFX_RELOCATE_ALL;
+       }
+
+       /* These can trigger use the of 3D engine to copy temporaries.
+        * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop..
+        * This converges to having clean temps, then binding both fragtexes and framebuffers.
         */
-       if (nvfx != screen->cur_ctx) {
-               for (i = 0; i < NVFX_STATE_MAX; i++) {
-                       if (state->hw[i] && screen->state[i] != state->hw[i])
-                               state->dirty |= (1ULL << i);
+       while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER))
+       {
+               if(nvfx->dirty & NVFX_NEW_SAMPLER)
+               {
+                       nvfx->dirty &=~ NVFX_NEW_SAMPLER;
+                       nvfx_fragtex_validate(nvfx);
+
+                       // TODO: only set this if really necessary
+                       flush_tex_cache = TRUE;
                }
 
-               screen->cur_ctx = nvfx;
+               if(nvfx->dirty & NVFX_NEW_FB)
+               {
+                       nvfx->dirty &=~ NVFX_NEW_FB;
+                       new_fb_mode = nvfx_framebuffer_prepare(nvfx);
+
+                       // TODO: make sure this doesn't happen, i.e. fbs have matching formats
+                       assert(new_fb_mode >= 0);
+               }
        }
 
-       for (i = 0, states = state->dirty; states; i++) {
-               if (!(states & (1ULL << i)))
-                       continue;
-               so_ref (state->hw[i], &nvfx->screen->state[i]);
-               if (state->hw[i])
-                       so_emit(chan, nvfx->screen->state[i]);
-               states &= ~(1ULL << i);
+       dirty = nvfx->dirty;
+
+       if(nvfx->render_mode == HW)
+       {
+               if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST))
+               {
+                       if(!nvfx_vertprog_validate(nvfx))
+                               return FALSE;
+               }
+
+               if(dirty & NVFX_NEW_ARRAYS)
+               {
+                       if(!nvfx_vbo_validate(nvfx))
+                               return FALSE;
+               }
+
+               if(dirty & NVFX_NEW_INDEX)
+               {
+                       if(nvfx->use_index_buffer)
+                               nvfx_idxbuf_validate(nvfx);
+                       else
+                               still_dirty = NVFX_NEW_INDEX;
+               }
+       }
+       else
+       {
+               if(dirty & NVFX_NEW_VERTPROG) {
+                       assert(nvfx_vertprog_validate(nvfx));
+                       nvfx_vbo_swtnl_validate(nvfx);
+               }
        }
 
-       /* TODO: could nv30 need this or something similar too? */
-       if(nvfx->is_nv4x) {
-               if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) |
-                                   (1ULL << NVFX_STATE_FRAGTEX0))) {
-                       BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1);
-                       OUT_RING  (chan, 2);
-                       BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1);
-                       OUT_RING  (chan, 1);
+       if(dirty & NVFX_NEW_RAST)
+               sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len);
+
+       if(dirty & NVFX_NEW_SCISSOR)
+               nvfx_state_scissor_validate(nvfx);
+
+       if(dirty & NVFX_NEW_STIPPLE)
+               nvfx_state_stipple_validate(nvfx);
+
+       if(nvfx->dirty & NVFX_NEW_UCP)
+              nvfx_ucp_validate(nvfx);
+
+       if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG)))
+               nvfx_vertprog_ucp_validate(nvfx);
+
+       if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE))
+       {
+               nvfx_fragprog_validate(nvfx);
+               if(dirty & NVFX_NEW_FRAGPROG)
+                       flush_tex_cache = TRUE; // TODO: do we need this?
+       }
+
+       if(nvfx->is_nv4x)
+       {
+               unsigned vp_output = nvfx->hw_vertprog->or | nvfx->hw_fragprog->or;
+               vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6);
+
+               if(vp_output != nvfx->hw_vp_output)
+               {
+                       WAIT_RING(chan, 2);
+                       OUT_RING(chan, RING_3D(NV40_3D_VP_RESULT_EN, 1));
+                       OUT_RING(chan, vp_output);
+                       nvfx->hw_vp_output = vp_output;
                }
        }
-       state->dirty = 0;
+
+       if(new_fb_mode >= 0)
+               nvfx_framebuffer_validate(nvfx, new_fb_mode);
+
+       if(dirty & NVFX_NEW_BLEND)
+               sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len);
+
+       if(dirty & NVFX_NEW_BCOL)
+               nvfx_state_blend_colour_validate(nvfx);
+
+       if(dirty & NVFX_NEW_ZSA)
+               sb_emit(chan, nvfx->zsa->sb, nvfx->zsa->sb_len);
+
+       if(dirty & NVFX_NEW_SR)
+               nvfx_state_sr_validate(nvfx);
+
+/* All these dependencies are wrong, but otherwise
+   etracer, neverball, foobillard, glest totally misrender
+   TODO: find the right fix
+*/
+       if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA) || (new_fb_mode >= 0))
+       {
+               nvfx_state_viewport_validate(nvfx);
+       }
+
+       if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0))
+       {
+               WAIT_RING(chan, 3);
+               OUT_RING(chan, RING_3D(NV30_3D_DEPTH_WRITE_ENABLE, 2));
+               OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask);
+               OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled);
+       }
+
+       if((new_fb_mode >= 0) || (dirty & NVFX_NEW_FRAGPROG))
+               nvfx_coord_conventions_validate(nvfx);
+
+       if(flush_tex_cache)
+       {
+               // TODO: what about nv30?
+               if(nvfx->is_nv4x)
+               {
+                       WAIT_RING(chan, 4);
+                       OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
+                       OUT_RING(chan, 2);
+                       OUT_RING(chan, RING_3D(NV40_3D_TEX_CACHE_CTL, 1));
+                       OUT_RING(chan, 1);
+               }
+       }
+
+       nvfx->dirty = dirty & still_dirty;
+
+       render_temps = nvfx->state.render_temps;
+       if(render_temps)
+       {
+               for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
+               {
+                       if(render_temps & (1 << i)) {
+                               assert(((struct nvfx_surface*)nvfx->framebuffer.cbufs[i])->temp);
+                               util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]),
+                                               (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]);
+                       }
+               }
+
+               if(render_temps & 0x80) {
+                       assert(((struct nvfx_surface*)nvfx->framebuffer.zsbuf)->temp);
+                       util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf),
+                                       (struct util_dirty_surface*)nvfx->framebuffer.zsbuf);
+               }
+       }
+
+       return TRUE;
 }
 
-void
-nvfx_state_flush_notify(struct nouveau_channel *chan)
+inline void
+nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs)
 {
-       struct nvfx_context *nvfx = chan->user_private;
-       struct nvfx_state *state = &nvfx->state;
-       unsigned i, samplers;
-
-       so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]);
-       for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
-               if (!(samplers & (1 << i)))
-                       continue;
-               so_emit_reloc_markers(chan,
-                                     state->hw[NVFX_STATE_FRAGTEX0+i]);
-               samplers &= ~(1ULL << i);
-       }
-       so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]);
-       if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW)
-               so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]);
+       struct nouveau_channel* chan = nvfx->screen->base.channel;
+       /* we need to ensure there is enough space to output relocations in one go */
+       const unsigned max_relocs = 0
+             + 16 /* vertex buffers, incl. dma flag */
+             + 2 /* index buffer plus format+dma flag */
+             + 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */
+             + 2 * 16 /* fragment textures plus format+dma flag */
+             + 2 * 4 /* vertex textures plus format+dma flag */
+             + 1 /* fragprog incl dma flag */
+             ;
+
+       MARK_RING(chan, max_relocs * 2, max_relocs * 2);
+
+       if(relocs & NVFX_RELOCATE_FRAMEBUFFER)
+               nvfx_framebuffer_relocate(nvfx);
+       if(relocs & NVFX_RELOCATE_FRAGTEX)
+               nvfx_fragtex_relocate(nvfx);
+       if(relocs & NVFX_RELOCATE_FRAGPROG)
+               nvfx_fragprog_relocate(nvfx);
+       if(relocs & NVFX_RELOCATE_VTXBUF)
+               nvfx_vbo_relocate(nvfx);
+       if(relocs & NVFX_RELOCATE_IDXBUF)
+               nvfx_idxbuf_relocate(nvfx);
 }
 
 boolean
 nvfx_state_validate(struct nvfx_context *nvfx)
 {
-       boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE;
-
        if (nvfx->render_mode != HW) {
                /* Don't even bother trying to go back to hw if none
                 * of the states that caused swtnl previously have changed.
@@ -119,24 +406,15 @@ nvfx_state_validate(struct nvfx_context *nvfx)
                        return FALSE;
 
                /* Attempt to go to hwtnl again */
-               nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
                nvfx->dirty |= (NVFX_NEW_VIEWPORT |
                                NVFX_NEW_VERTPROG |
                                NVFX_NEW_ARRAYS);
                nvfx->render_mode = HW;
        }
 
-       if(!nvfx->is_nv4x)
-               nvfx_state_do_validate(nvfx, nv30_render_states);
-       else
-               nvfx_state_do_validate(nvfx, nv40_render_states);
-
-       if (nvfx->fallback_swtnl || nvfx->fallback_swrast)
+       if(!nvfx_state_validate_common(nvfx))
                return FALSE;
 
-       if (was_sw)
-               NOUVEAU_ERR("swtnl->hw\n");
-
        return TRUE;
 }
 
@@ -147,7 +425,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
 
        /* Setup for swtnl */
        if (nvfx->render_mode == HW) {
-               NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
+               static boolean warned = FALSE;
+               if(!warned) {
+                       NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl);
+                       warned = TRUE;
+               }
                nvfx->pipe.flush(&nvfx->pipe, 0, NULL);
                nvfx->dirty |= (NVFX_NEW_VIEWPORT |
                                NVFX_NEW_VERTPROG |
@@ -155,11 +437,15 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
                nvfx->render_mode = SWTNL;
        }
 
-       if (nvfx->draw_dirty & NVFX_NEW_VERTPROG)
-               draw_bind_vertex_shader(draw, nvfx->vertprog->draw);
+       if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) {
+               if(!nvfx->vertprog->draw_vs)
+                       nvfx->vertprog->draw_vs = draw_create_vertex_shader(draw, &nvfx->vertprog->pipe);
+               draw_bind_vertex_shader(draw, nvfx->vertprog->draw_vs);
+       }
 
        if (nvfx->draw_dirty & NVFX_NEW_RAST)
-               draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe);
+           draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe,
+                                     nvfx->rasterizer);
 
        if (nvfx->draw_dirty & NVFX_NEW_UCP)
                draw_set_clip_state(draw, &nvfx->clip);
@@ -172,15 +458,10 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)
                draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe);
        }
 
-       if(!nvfx->is_nv4x)
-               nvfx_state_do_validate(nvfx, nv30_swtnl_render_states);
-       else
-               nvfx_state_do_validate(nvfx, nv40_swtnl_render_states);
+       if (nvfx->draw_dirty & NVFX_NEW_INDEX)
+               draw_set_index_buffer(draw, &nvfx->idxbuf);
 
-       if (nvfx->fallback_swrast) {
-               NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast);
-               return FALSE;
-       }
+       nvfx_state_validate_common(nvfx);
 
        nvfx->draw_dirty = 0;
        return TRUE;