llvmpipe: add LP_NEW_GS flag for updating vertex info
[mesa.git] / src / gallium / state_trackers / nine / device9.c
index 1a345b93d3c124f594208bc7c2c673e6f1857d2c..88df38c43f69f02b1110261f12b2d1eda853de15 100644 (file)
@@ -34,6 +34,7 @@
 #include "texture9.h"
 #include "cubetexture9.h"
 #include "volumetexture9.h"
+#include "nine_buffer_upload.h"
 #include "nine_helpers.h"
 #include "nine_pipe.h"
 #include "nine_ff.h"
 
 #if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
 
-#include <fpu_control.h>
-
 static void nine_setup_fpu()
 {
-    fpu_control_t c;
+    uint16_t c;
+
+    __asm__ __volatile__ ("fnstcw %0" : "=m" (*&c));
 
-    _FPU_GETCW(c);
     /* clear the control word */
-    c &= _FPU_RESERVED;
+    c &= 0xF0C0;
     /* d3d9 doc/wine tests: mask all exceptions, use single-precision
      * and round to nearest */
-    c |= _FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM |
-         _FPU_MASK_UM | _FPU_MASK_PM | _FPU_SINGLE | _FPU_RC_NEAREST;
-    _FPU_SETCW(c);
+    c |= 0x003F;
+
+    __asm__ __volatile__ ("fldcw %0" : : "m" (*&c));
 }
 
 #else
@@ -93,31 +93,28 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
 
     nine_state_set_defaults(This, &This->caps, is_reset);
 
+    refSurf = This->swapchains[0]->buffers[0];
+    assert(refSurf);
+
     This->state.viewport.X = 0;
     This->state.viewport.Y = 0;
-    This->state.viewport.Width = 0;
-    This->state.viewport.Height = 0;
+    This->state.viewport.Width = refSurf->desc.Width;
+    This->state.viewport.Height = refSurf->desc.Height;
+
+    nine_context_set_viewport(This, &This->state.viewport);
 
     This->state.scissor.minx = 0;
     This->state.scissor.miny = 0;
-    This->state.scissor.maxx = 0xffff;
-    This->state.scissor.maxy = 0xffff;
+    This->state.scissor.maxx = refSurf->desc.Width;
+    This->state.scissor.maxy = refSurf->desc.Height;
 
-    if (This->nswapchains && This->swapchains[0]->params.BackBufferCount)
-        refSurf = This->swapchains[0]->buffers[0];
-
-    if (refSurf) {
-        This->state.viewport.Width = refSurf->desc.Width;
-        This->state.viewport.Height = refSurf->desc.Height;
-        This->state.scissor.maxx = refSurf->desc.Width;
-        This->state.scissor.maxy = refSurf->desc.Height;
-    }
+    nine_context_set_scissor(This, &This->state.scissor);
 
     if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil) {
-        This->state.rs[D3DRS_ZENABLE] = TRUE;
+        nine_context_set_render_state(This, D3DRS_ZENABLE, TRUE);
         This->state.rs_advertised[D3DRS_ZENABLE] = TRUE;
     }
-    if (This->state.rs[D3DRS_ZENABLE])
+    if (This->state.rs_advertised[D3DRS_ZENABLE])
         NineDevice9_SetDepthStencilSurface(
             This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
 }
@@ -153,6 +150,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
     list_inithead(&This->managed_textures);
 
     This->screen = pScreen;
+    This->screen_sw = pCTX->ref;
     This->caps = *pCaps;
     This->d3d9 = pD3D9;
     This->params = *pCreationParameters;
@@ -166,19 +164,48 @@ NineDevice9_ctor( struct NineDevice9 *This,
     if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE))
         nine_setup_fpu();
 
-    if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING)
-        DBG("Application asked full Software Vertex Processing. Ignoring.\n");
-    if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
-        DBG("Application asked mixed Software Vertex Processing. Ignoring.\n");
-
-    This->pipe = This->screen->context_create(This->screen, NULL, 0);
-    if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
-
-    This->cso = cso_create_context(This->pipe);
-    if (!This->cso) { return E_OUTOFMEMORY; } /* also a guess */
+    if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
+        DBG("Application asked full Software Vertex Processing.\n");
+        This->swvp = true;
+        This->may_swvp = true;
+    } else
+        This->swvp = false;
+    if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
+        DBG("Application asked mixed Software Vertex Processing.\n");
+        This->may_swvp = true;
+    }
+    This->context.swvp = This->swvp;
+    /* TODO: check if swvp is resetted by device Resets */
+
+    if (This->may_swvp &&
+        (This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+                                        PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE)
+                                     < (NINE_MAX_CONST_F_SWVP/2) * sizeof(float[4]) ||
+         This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+                                        PIPE_SHADER_CAP_MAX_CONST_BUFFERS) < 5)) {
+        /* Note: We just go on, some apps never use the abilities of
+         * swvp, and just set more constants than allowed at init.
+         * Only cards we support that are affected are the r500 */
+        WARN("Card unable to handle Software Vertex Processing. Game may fail\n");
+    }
+
+    /* When may_swvp, SetConstant* limits are different */
+    if (This->may_swvp)
+        This->caps.MaxVertexShaderConst = NINE_MAX_CONST_F_SWVP;
+
+    This->context.pipe = This->screen->context_create(This->screen, NULL, 0);
+    This->pipe_secondary = This->screen->context_create(This->screen, NULL, 0);
+    if (!This->context.pipe || !This->pipe_secondary) { return E_OUTOFMEMORY; } /* guess */
+    This->pipe_sw = This->screen_sw->context_create(This->screen_sw, NULL, 0);
+    if (!This->pipe_sw) { return E_OUTOFMEMORY; }
+
+    This->context.cso = cso_create_context(This->context.pipe, 0);
+    if (!This->context.cso) { return E_OUTOFMEMORY; } /* also a guess */
+    This->cso_sw = cso_create_context(This->pipe_sw, 0);
+    if (!This->cso_sw) { return E_OUTOFMEMORY; }
 
     /* Create first, it messes up our state. */
-    This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
+    This->hud = hud_create(This->context.pipe, This->context.cso); /* NULL result is fine */
 
     /* Available memory counter. Updated only for allocations with this device
      * instance. This is the Win 7 behavior.
@@ -235,9 +262,36 @@ NineDevice9_ctor( struct NineDevice9 *This,
         if (FAILED(hr))
             return hr;
         NineUnknown_ConvertRefToBind(NineUnknown(This->state.rt[i]));
+        nine_bind(&This->context.rt[i], This->state.rt[i]);
+    }
+
+    /* Initialize CSMT */
+    if (pCTX->csmt_force == 1)
+        This->csmt_active = true;
+    else if (pCTX->csmt_force == 0)
+        This->csmt_active = false;
+    else
+        /* r600 and radeonsi are thread safe. */
+        This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != NULL;
+
+    /* We rely on u_upload_mgr using persistent coherent buffers (which don't
+     * require flush to work in multi-pipe_context scenario) for vertex and
+     * index buffers */
+    if (!GET_PCAP(BUFFER_MAP_PERSISTENT_COHERENT))
+        This->csmt_active = false;
+
+    if (This->csmt_active) {
+        This->csmt_ctx = nine_csmt_create(This);
+        if (!This->csmt_ctx)
+            return E_OUTOFMEMORY;
     }
 
-    /* Initialize a dummy VBO to be used when a a vertex declaration does not
+    if (This->csmt_active)
+        DBG("\033[1;32mCSMT is active\033[0m\n");
+
+    This->buffer_upload = nine_upload_create(This->pipe_secondary, 4 * 1024 * 1024, 4);
+
+    /* Initialize a dummy VBO to be used when a vertex declaration does not
      * specify all the inputs needed by vertex shader, on win default behavior
      * is to pass 0,0,0,0 to the shader */
     {
@@ -246,6 +300,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
         struct pipe_box box;
         unsigned char *data;
 
+        memset(&tmpl, 0, sizeof(tmpl));
         tmpl.target = PIPE_BUFFER;
         tmpl.format = PIPE_FORMAT_R8_UNORM;
         tmpl.width0 = 16; /* 4 floats */
@@ -255,7 +310,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
         tmpl.last_level = 0;
         tmpl.nr_samples = 0;
         tmpl.usage = PIPE_USAGE_DEFAULT;
-        tmpl.bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
+        tmpl.bind = PIPE_BIND_VERTEX_BUFFER;
         tmpl.flags = 0;
         This->dummy_vbo = pScreen->resource_create(pScreen, &tmpl);
 
@@ -263,14 +318,14 @@ NineDevice9_ctor( struct NineDevice9 *This,
             return D3DERR_OUTOFVIDEOMEMORY;
 
         u_box_1d(0, 16, &box);
-        data = This->pipe->transfer_map(This->pipe, This->dummy_vbo, 0,
+        data = This->context.pipe->transfer_map(This->context.pipe, This->dummy_vbo, 0,
                                         PIPE_TRANSFER_WRITE |
                                         PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
                                         &box, &transfer);
         assert(data);
         assert(transfer);
         memset(data, 0, 16);
-        This->pipe->transfer_unmap(This->pipe, transfer);
+        This->context.pipe->transfer_unmap(This->context.pipe, transfer);
     }
 
     This->cursor.software = FALSE;
@@ -278,6 +333,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
     This->cursor.hotspot.y = -1;
     {
         struct pipe_resource tmpl;
+        memset(&tmpl, 0, sizeof(tmpl));
         tmpl.target = PIPE_TEXTURE_2D;
         tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
         tmpl.width0 = 64;
@@ -293,11 +349,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
         This->cursor.image = pScreen->resource_create(pScreen, &tmpl);
         if (!This->cursor.image)
             return D3DERR_OUTOFVIDEOMEMORY;
+
+        /* For uploading 32x32 (argb) cursor */
+        This->cursor.hw_upload_temp = MALLOC(32 * 4 * 32);
+        if (!This->cursor.hw_upload_temp)
+            return D3DERR_OUTOFVIDEOMEMORY;
     }
 
     /* Create constant buffers. */
     {
-        struct pipe_resource tmpl;
         unsigned max_const_vs, max_const_ps;
 
         /* vs 3.0: >= 256 float constants, but for cards with exactly 256 slots,
@@ -318,41 +378,43 @@ NineDevice9_ctor( struct NineDevice9 *This,
         This->vs_const_size = max_const_vs * sizeof(float[4]);
         This->ps_const_size = max_const_ps * sizeof(float[4]);
         /* Include space for I,B constants for user constbuf. */
-        This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
+        if (This->may_swvp) {
+            This->state.vs_const_f = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+            This->context.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+            if (!This->context.vs_const_f_swvp)
+                return E_OUTOFMEMORY;
+            This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+            This->context.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+            This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
+            This->context.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
+            This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
+            This->context.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
+        } else {
+            This->state.vs_const_f = CALLOC(NINE_MAX_CONST_F * sizeof(float[4]), 1);
+            This->context.vs_const_f_swvp = NULL;
+            This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+            This->context.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+            This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
+            This->context.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
+            This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
+            This->context.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
+        }
+        This->context.vs_const_f = CALLOC(This->vs_const_size, 1);
         This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
-        This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
-        This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
-        if (!This->state.vs_const_f || !This->state.ps_const_f ||
-            !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp)
+        This->context.ps_const_f = CALLOC(This->ps_const_size, 1);
+        This->context.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
+        if (!This->state.vs_const_f || !This->context.vs_const_f ||
+            !This->state.ps_const_f || !This->context.ps_const_f ||
+            !This->state.vs_lconstf_temp || !This->context.vs_lconstf_temp ||
+            !This->context.ps_lconstf_temp ||
+            !This->state.vs_const_i || !This->context.vs_const_i ||
+            !This->state.vs_const_b || !This->context.vs_const_b)
             return E_OUTOFMEMORY;
 
         if (strstr(pScreen->get_name(pScreen), "AMD") ||
             strstr(pScreen->get_name(pScreen), "ATI")) {
             This->driver_bugs.buggy_barycentrics = TRUE;
         }
-
-        /* Disable NV path for now, needs some fixes */
-        This->prefer_user_constbuf = TRUE;
-
-        tmpl.target = PIPE_BUFFER;
-        tmpl.format = PIPE_FORMAT_R8_UNORM;
-        tmpl.height0 = 1;
-        tmpl.depth0 = 1;
-        tmpl.array_size = 1;
-        tmpl.last_level = 0;
-        tmpl.nr_samples = 0;
-        tmpl.usage = PIPE_USAGE_DYNAMIC;
-        tmpl.bind = PIPE_BIND_CONSTANT_BUFFER;
-        tmpl.flags = 0;
-
-        tmpl.width0 = This->vs_const_size;
-        This->constbuf_vs = pScreen->resource_create(pScreen, &tmpl);
-
-        tmpl.width0 = This->ps_const_size;
-        This->constbuf_ps = pScreen->resource_create(pScreen, &tmpl);
-
-        if (!This->constbuf_vs || !This->constbuf_ps)
-            return E_OUTOFMEMORY;
     }
 
     /* allocate dummy texture/sampler for when there are missing ones bound */
@@ -360,6 +422,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
         struct pipe_resource tmplt;
         struct pipe_sampler_view templ;
         struct pipe_sampler_state samp;
+        memset(&tmplt, 0, sizeof(tmplt));
         memset(&samp, 0, sizeof(samp));
 
         tmplt.target = PIPE_TEXTURE_2D;
@@ -389,7 +452,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
         templ.swizzle_a = PIPE_SWIZZLE_1;
         templ.target = This->dummy_texture->target;
 
-        This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
+        This->dummy_sampler_view = This->context.pipe->create_sampler_view(This->context.pipe, This->dummy_texture, &templ);
         if (!This->dummy_sampler_view)
             return D3DERR_DRIVERINTERNALERROR;
 
@@ -403,31 +466,23 @@ NineDevice9_ctor( struct NineDevice9 *This,
         samp.compare_mode = PIPE_TEX_COMPARE_NONE;
         samp.compare_func = PIPE_FUNC_LEQUAL;
         samp.normalized_coords = 1;
-        samp.seamless_cube_map = 1;
+        samp.seamless_cube_map = 0;
         This->dummy_sampler_state = samp;
     }
 
     /* Allocate upload helper for drivers that suck (from st pov ;). */
 
-    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
-    This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS) && !This->csmt_active;
     This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
-
-    if (!This->driver_caps.user_vbufs)
-        This->vertex_uploader = u_upload_create(This->pipe, 65536,
-                                                PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
-    if (!This->driver_caps.user_ibufs)
-        This->index_uploader = u_upload_create(This->pipe, 128 * 1024,
-                                               PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM);
-    if (!This->driver_caps.user_cbufs) {
+    This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS);
+    This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS);
+    This->vertex_uploader = This->csmt_active ? This->pipe_secondary->stream_uploader : This->context.pipe->stream_uploader;
+    if (!This->driver_caps.user_cbufs)
         This->constbuf_alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT);
-        This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size,
-                                                  PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
-    }
-
     This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
     This->driver_caps.vs_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS);
     This->driver_caps.ps_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
+    This->driver_caps.offset_units_unscaled = GET_PCAP(POLYGON_OFFSET_UNITS_UNSCALED);
 
     nine_ff_init(This); /* initialize fixed function code */
 
@@ -436,13 +491,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
     {
         struct pipe_poly_stipple stipple;
         memset(&stipple, ~0, sizeof(stipple));
-        This->pipe->set_polygon_stipple(This->pipe, &stipple);
+        This->context.pipe->set_polygon_stipple(This->context.pipe, &stipple);
     }
 
     This->update = &This->state;
-    nine_update_state(This);
+
+    nine_state_init_sw(This);
 
     ID3DPresentGroup_Release(This->present);
+    nine_csmt_process(This);
 
     return D3D_OK;
 }
@@ -455,29 +512,43 @@ NineDevice9_dtor( struct NineDevice9 *This )
 
     DBG("This=%p\n", This);
 
-    if (This->pipe && This->cso)
-        nine_pipe_context_clear(This);
+    /* Flush all pending commands to get refcount right,
+     * and properly release bound objects. It is ok to still
+     * execute commands while we are in device dtor, because
+     * we haven't released anything yet. Note that no pending
+     * command can increase the device refcount. */
+    if (This->csmt_active && This->csmt_ctx) {
+        nine_csmt_process(This);
+        nine_csmt_destroy(This, This->csmt_ctx);
+        This->csmt_active = FALSE;
+        This->csmt_ctx = NULL;
+    }
+
     nine_ff_fini(This);
+    nine_state_destroy_sw(This);
     nine_state_clear(&This->state, TRUE);
-
-    if (This->vertex_uploader)
-        u_upload_destroy(This->vertex_uploader);
-    if (This->index_uploader)
-        u_upload_destroy(This->index_uploader);
-    if (This->constbuf_uploader)
-        u_upload_destroy(This->constbuf_uploader);
+    nine_context_clear(This);
 
     nine_bind(&This->record, NULL);
 
     pipe_sampler_view_reference(&This->dummy_sampler_view, NULL);
     pipe_resource_reference(&This->dummy_texture, NULL);
-    pipe_resource_reference(&This->constbuf_vs, NULL);
-    pipe_resource_reference(&This->constbuf_ps, NULL);
     pipe_resource_reference(&This->dummy_vbo, NULL);
     FREE(This->state.vs_const_f);
+    FREE(This->context.vs_const_f);
     FREE(This->state.ps_const_f);
+    FREE(This->context.ps_const_f);
     FREE(This->state.vs_lconstf_temp);
-    FREE(This->state.ps_lconstf_temp);
+    FREE(This->context.vs_lconstf_temp);
+    FREE(This->context.ps_lconstf_temp);
+    FREE(This->state.vs_const_i);
+    FREE(This->context.vs_const_i);
+    FREE(This->state.vs_const_b);
+    FREE(This->context.vs_const_b);
+    FREE(This->context.vs_const_f_swvp);
+
+    pipe_resource_reference(&This->cursor.image, NULL);
+    FREE(This->cursor.hw_upload_temp);
 
     if (This->swapchains) {
         for (i = 0; i < This->nswapchains; ++i)
@@ -486,13 +557,15 @@ NineDevice9_dtor( struct NineDevice9 *This )
         FREE(This->swapchains);
     }
 
-    /* state stuff */
-    if (This->pipe) {
-        if (This->cso) {
-            cso_destroy_context(This->cso);
-        }
-        if (This->pipe->destroy) { This->pipe->destroy(This->pipe); }
-    }
+    if (This->buffer_upload)
+        nine_upload_destroy(This->buffer_upload);
+
+    /* Destroy cso first */
+    if (This->context.cso) { cso_destroy_context(This->context.cso); }
+    if (This->cso_sw) { cso_destroy_context(This->cso_sw); }
+    if (This->context.pipe && This->context.pipe->destroy) { This->context.pipe->destroy(This->context.pipe); }
+    if (This->pipe_secondary && This->pipe_secondary->destroy) { This->pipe_secondary->destroy(This->pipe_secondary); }
+    if (This->pipe_sw && This->pipe_sw->destroy) { This->pipe_sw->destroy(This->pipe_sw); }
 
     if (This->present) { ID3DPresentGroup_Release(This->present); }
     if (This->d3d9) { IDirect3D9_Release(This->d3d9); }
@@ -509,13 +582,7 @@ NineDevice9_GetScreen( struct NineDevice9 *This )
 struct pipe_context *
 NineDevice9_GetPipe( struct NineDevice9 *This )
 {
-    return This->pipe;
-}
-
-struct cso_context *
-NineDevice9_GetCSO( struct NineDevice9 *This )
-{
-    return This->cso;
+    return nine_context_get_pipe(This);
 }
 
 const D3DCAPS9 *
@@ -548,6 +615,9 @@ NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
     if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
         This->device_needs_reset = TRUE;
         return D3DERR_DEVICELOST;
+    } else if (NineSwapChain9_ResolutionMismatch(This->swapchains[0])) {
+        This->device_needs_reset = TRUE;
+        return D3DERR_DEVICENOTRESET;
     } else if (This->device_needs_reset) {
         return D3DERR_DEVICENOTRESET;
     }
@@ -630,7 +700,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
                                  IDirect3DSurface9 *pCursorBitmap )
 {
     struct NineSurface9 *surf = NineSurface9(pCursorBitmap);
-    struct pipe_context *pipe = This->pipe;
+    struct pipe_context *pipe = NineDevice9_GetPipe(This);
     struct pipe_box box;
     struct pipe_transfer *transfer;
     BOOL hw_cursor;
@@ -681,11 +751,20 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
                                  lock.pBits, lock.Pitch,
                                  This->cursor.w, This->cursor.h);
 
-        if (hw_cursor)
+        if (hw_cursor) {
+            void *data = lock.pBits;
+            /* SetCursor assumes 32x32 argb with pitch 128 */
+            if (lock.Pitch != 128) {
+                sfmt->unpack_rgba_8unorm(This->cursor.hw_upload_temp, 128,
+                                         lock.pBits, lock.Pitch,
+                                         32, 32);
+                data = This->cursor.hw_upload_temp;
+            }
             hw_cursor = ID3DPresent_SetCursor(This->swapchains[0]->present,
-                                              lock.pBits,
+                                              data,
                                               &This->cursor.hotspot,
                                               This->cursor.visible) == D3D_OK;
+        }
 
         NineSurface9_UnlockRect(surf);
     }
@@ -809,8 +888,9 @@ NineDevice9_Reset( struct NineDevice9 *This,
             break;
     }
 
-    nine_pipe_context_clear(This);
+    nine_csmt_process(This);
     nine_state_clear(&This->state, TRUE);
+    nine_context_clear(This);
 
     NineDevice9_SetDefaultState(This, TRUE);
     NineDevice9_SetRenderTarget(
@@ -1098,11 +1178,8 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
                         HANDLE *pSharedHandle)
 {
     struct NineSurface9 *surface;
-    struct pipe_screen *screen = This->screen;
-    struct pipe_resource *resource = NULL;
     HRESULT hr;
     D3DSURFACE_DESC desc;
-    struct pipe_resource templ;
 
     DBG("This=%p type=%u Pool=%s Width=%u Height=%u Format=%s MS=%u Quality=%u "
         "Discard_or_Lockable=%i ppSurface=%p pSharedHandle=%p\n",
@@ -1116,30 +1193,6 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
     user_assert(Width && Height, D3DERR_INVALIDCALL);
     user_assert(Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL);
 
-    templ.target = PIPE_TEXTURE_2D;
-    templ.width0 = Width;
-    templ.height0 = Height;
-    templ.depth0 = 1;
-    templ.array_size = 1;
-    templ.last_level = 0;
-    templ.nr_samples = (unsigned)MultiSample;
-    templ.usage = PIPE_USAGE_DEFAULT;
-    templ.flags = 0;
-    templ.bind = PIPE_BIND_SAMPLER_VIEW; /* StretchRect */
-    switch (type) {
-    case 0: templ.bind |= PIPE_BIND_RENDER_TARGET; break;
-    case 1: templ.bind = d3d9_get_pipe_depth_format_bindings(Format); break;
-    default:
-        assert(type == 2);
-        break;
-    }
-    templ.format = d3d9_to_pipe_format_checked(screen, Format, templ.target,
-                                               templ.nr_samples, templ.bind,
-                                               FALSE, Pool == D3DPOOL_SCRATCH);
-
-    if (templ.format == PIPE_FORMAT_NONE && Format != D3DFMT_NULL)
-        return D3DERR_INVALIDCALL;
-
     desc.Format = Format;
     desc.Type = D3DRTYPE_SURFACE;
     desc.Usage = 0;
@@ -1151,31 +1204,17 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
     switch (type) {
     case 0: desc.Usage = D3DUSAGE_RENDERTARGET; break;
     case 1: desc.Usage = D3DUSAGE_DEPTHSTENCIL; break;
-    default: break;
+    default: assert(type == 2); break;
     }
 
-    if (compressed_format(Format)) {
-        const unsigned w = util_format_get_blockwidth(templ.format);
-        const unsigned h = util_format_get_blockheight(templ.format);
-
-        user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
-    }
+    hr = NineSurface9_new(This, NULL, NULL, NULL, 0, 0, 0, &desc, &surface);
+    if (SUCCEEDED(hr)) {
+        *ppSurface = (IDirect3DSurface9 *)surface;
 
-    if (Pool == D3DPOOL_DEFAULT && Format != D3DFMT_NULL) {
-        /* resource_create doesn't return an error code, so check format here */
-        user_assert(templ.format != PIPE_FORMAT_NONE, D3DERR_INVALIDCALL);
-        resource = screen->resource_create(screen, &templ);
-        user_assert(resource, D3DERR_OUTOFVIDEOMEMORY);
-        if (Discard_or_Lockable && (desc.Usage & D3DUSAGE_RENDERTARGET))
-            resource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
-    } else {
-        resource = NULL;
+        if (surface->base.resource && Discard_or_Lockable && (type != 1))
+            surface->base.resource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
     }
-    hr = NineSurface9_new(This, NULL, resource, NULL, 0, 0, 0, &desc, &surface);
-    pipe_resource_reference(&resource, NULL);
 
-    if (SUCCEEDED(hr))
-        *ppSurface = (IDirect3DSurface9 *)surface;
     return hr;
 }
 
@@ -1318,51 +1357,61 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
     struct NineBaseTexture9 *dstb = NineBaseTexture9(pDestinationTexture);
     struct NineBaseTexture9 *srcb = NineBaseTexture9(pSourceTexture);
     unsigned l, m;
-    unsigned last_level = dstb->base.info.last_level;
+    unsigned last_src_level, last_dst_level;
     RECT rect;
 
     DBG("This=%p pSourceTexture=%p pDestinationTexture=%p\n", This,
         pSourceTexture, pDestinationTexture);
 
+    user_assert(pSourceTexture && pDestinationTexture, D3DERR_INVALIDCALL);
     user_assert(pSourceTexture != pDestinationTexture, D3DERR_INVALIDCALL);
 
     user_assert(dstb->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
     user_assert(srcb->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
-
-    if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) {
-        /* Only the first level is updated, the others regenerated. */
-        last_level = 0;
-        /* if the source has D3DUSAGE_AUTOGENMIPMAP, we have to ignore
-         * the sublevels, thus level 0 has to match */
-        user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP) ||
-                    (srcb->base.info.width0 == dstb->base.info.width0 &&
-                     srcb->base.info.height0 == dstb->base.info.height0 &&
-                     srcb->base.info.depth0 == dstb->base.info.depth0),
-                    D3DERR_INVALIDCALL);
-    } else {
-        user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP), D3DERR_INVALIDCALL);
-    }
-
     user_assert(dstb->base.type == srcb->base.type, D3DERR_INVALIDCALL);
+    user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP) ||
+                dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP, D3DERR_INVALIDCALL);
+
+    /* Spec: Failure if
+     * . Different formats
+     * . Fewer src levels than dst levels (if the opposite, only matching levels
+     *   are supposed to be copied)
+     * . Levels do not match
+     * DDI: Actually the above should pass because of legacy applications
+     * Do what you want about these, but you shouldn't crash.
+     * However driver can expect that the top dimension is greater for src than dst.
+     * Wine tests: Every combination that passes the initial checks should pass.
+     * . Different formats => conversion driver and format dependent.
+     * . 1 level, but size not matching => copy is done (and even crash if src bigger
+     * than dst. For the case where dst bigger, wine doesn't test if a stretch is applied
+     * or if a subrect is copied).
+     * . 8x8 4 sublevels -> 7x7 2 sublevels => driver dependent, On NV seems to be 4x4 subrect
+     * copied to 7x7.
+     *
+     * From these, the proposal is:
+     * . Different formats -> use util_format_translate to translate if possible for surfaces.
+     * Accept ARGB/XRGB for Volumes. Do nothing for the other combinations
+     * . First level copied -> the first level such that src is smaller or equal to dst first level
+     * . number of levels copied -> as long as it fits and textures have levels
+     * That should satisfy the constraints (and instead of crashing for some cases we return D3D_OK)
+     */
 
-    /* Find src level that matches dst level 0: */
-    user_assert(srcb->base.info.width0 >= dstb->base.info.width0 &&
-                srcb->base.info.height0 >= dstb->base.info.height0 &&
-                srcb->base.info.depth0 >= dstb->base.info.depth0,
-                D3DERR_INVALIDCALL);
-    for (m = 0; m <= srcb->base.info.last_level; ++m) {
+    last_src_level = (srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP) ? 0 : srcb->base.info.last_level;
+    last_dst_level = (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) ? 0 : dstb->base.info.last_level;
+
+    for (m = 0; m <= last_src_level; ++m) {
         unsigned w = u_minify(srcb->base.info.width0, m);
         unsigned h = u_minify(srcb->base.info.height0, m);
         unsigned d = u_minify(srcb->base.info.depth0, m);
 
-        if (w == dstb->base.info.width0 &&
-            h == dstb->base.info.height0 &&
-            d == dstb->base.info.depth0)
+        if (w <= dstb->base.info.width0 &&
+            h <= dstb->base.info.height0 &&
+            d <= dstb->base.info.depth0)
             break;
     }
-    user_assert(m <= srcb->base.info.last_level, D3DERR_INVALIDCALL);
+    user_assert(m <= last_src_level, D3D_OK);
 
-    last_level = MIN2(last_level, srcb->base.info.last_level - m);
+    last_dst_level = MIN2(srcb->base.info.last_level - m, last_dst_level);
 
     if (dstb->base.type == D3DRTYPE_TEXTURE) {
         struct NineTexture9 *dst = NineTexture9(dstb);
@@ -1375,7 +1424,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
         for (l = 0; l < m; ++l)
             rect_minify_inclusive(&rect);
 
-        for (l = 0; l <= last_level; ++l, ++m) {
+        for (l = 0; l <= last_dst_level; ++l, ++m) {
             fit_rect_format_inclusive(dst->base.base.info.format,
                                       &rect,
                                       dst->surfaces[l]->desc.Width,
@@ -1402,7 +1451,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
             for (l = 0; l < m; ++l)
                 rect_minify_inclusive(&rect);
 
-            for (l = 0; l <= last_level; ++l, ++m) {
+            for (l = 0; l <= last_dst_level; ++l, ++m) {
                 fit_rect_format_inclusive(dst->base.base.info.format,
                                           &rect,
                                           dst->surfaces[l * 6 + z]->desc.Width,
@@ -1423,7 +1472,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
 
         if (src->dirty_box.width == 0)
             return D3D_OK;
-        for (l = 0; l <= last_level; ++l, ++m)
+        for (l = 0; l <= last_dst_level; ++l, ++m)
             NineVolume9_CopyMemToDefault(dst->volumes[l],
                                          src->volumes[m], 0, 0, 0, NULL);
         u_box_3d(0, 0, 0, 0, 0, 0, &src->dirty_box);
@@ -1450,6 +1499,8 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
     DBG("This=%p pRenderTarget=%p pDestSurface=%p\n",
         This, pRenderTarget, pDestSurface);
 
+    user_assert(pRenderTarget && pDestSurface, D3DERR_INVALIDCALL);
+
     user_assert(dst->desc.Pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
     user_assert(src->desc.Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
 
@@ -1459,6 +1510,8 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
     user_assert(src->desc.Width == dst->desc.Width, D3DERR_INVALIDCALL);
     user_assert(src->desc.Height == dst->desc.Height, D3DERR_INVALIDCALL);
 
+    user_assert(src->desc.Format != D3DFMT_NULL, D3DERR_INVALIDCALL);
+
     NineSurface9_CopyDefaultToMem(dst, src);
 
     return D3D_OK;
@@ -1488,7 +1541,6 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                          D3DTEXTUREFILTERTYPE Filter )
 {
     struct pipe_screen *screen = This->screen;
-    struct pipe_context *pipe = This->pipe;
     struct NineSurface9 *dst = NineSurface9(pDestSurface);
     struct NineSurface9 *src = NineSurface9(pSourceSurface);
     struct pipe_resource *dst_res = NineSurface9_GetResource(dst);
@@ -1648,7 +1700,8 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
         clamped = !!xy;
     }
 
-    ms = (dst->desc.MultiSampleType | 1) != (src->desc.MultiSampleType | 1);
+    ms = (dst->desc.MultiSampleType != src->desc.MultiSampleType) ||
+         (dst->desc.MultiSampleQuality != src->desc.MultiSampleQuality);
 
     if (clamped || scaled || (blit.dst.format != blit.src.format) || ms) {
         DBG("using pipe->blit()\n");
@@ -1660,7 +1713,8 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                                                 PIPE_BIND_RENDER_TARGET),
                     D3DERR_INVALIDCALL);
 
-        pipe->blit(pipe, &blit);
+        nine_context_blit(This, (struct NineUnknown *)dst,
+                          (struct NineUnknown *)src, &blit);
     } else {
         assert(blit.dst.box.x >= 0 && blit.dst.box.y >= 0 &&
                blit.src.box.x >= 0 && blit.src.box.y >= 0 &&
@@ -1670,11 +1724,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                blit.src.box.y + blit.src.box.height <= src->desc.Height);
         /* Or drivers might crash ... */
         DBG("Using resource_copy_region.\n");
-        pipe->resource_copy_region(pipe,
-            blit.dst.resource, blit.dst.level,
-            blit.dst.box.x, blit.dst.box.y, blit.dst.box.z,
-            blit.src.resource, blit.src.level,
-            &blit.src.box);
+        nine_context_resource_copy_region(This, (struct NineUnknown *)dst,
+                                          (struct NineUnknown *)src,
+                                          blit.dst.resource, blit.dst.level,
+                                          &blit.dst.box,
+                                          blit.src.resource, blit.src.level,
+                                          &blit.src.box);
     }
 
     /* Communicate the container it needs to update sublevels - if apply */
@@ -1689,12 +1744,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
                        const RECT *pRect,
                        D3DCOLOR color )
 {
-    struct pipe_context *pipe = This->pipe;
     struct NineSurface9 *surf = NineSurface9(pSurface);
-    struct pipe_surface *psurf;
     unsigned x, y, w, h;
-    union pipe_color_union rgba;
-    boolean fallback;
 
     DBG("This=%p pSurface=%p pRect=%p color=%08x\n", This,
         pSurface, pRect, color);
@@ -1714,30 +1765,29 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
         y = pRect->top;
         w = pRect->right - pRect->left;
         h = pRect->bottom - pRect->top;
+        /* Wine tests: */
+        if (compressed_format(surf->desc.Format)) {
+           const unsigned bw = util_format_get_blockwidth(surf->base.info.format);
+           const unsigned bh = util_format_get_blockheight(surf->base.info.format);
+
+           user_assert(!(x % bw) && !(y % bh) && !(w % bw) && !(h % bh),
+                       D3DERR_INVALIDCALL);
+        }
     } else{
         x = 0;
         y = 0;
         w = surf->desc.Width;
         h = surf->desc.Height;
     }
-    d3dcolor_to_pipe_color_union(&rgba, color);
 
-    fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET);
-
-    if (!fallback) {
-        psurf = NineSurface9_GetSurface(surf, 0);
-        if (!psurf)
-            fallback = TRUE;
-    }
-
-    if (!fallback) {
-        pipe->clear_render_target(pipe, psurf, &rgba, x, y, w, h);
+    if (surf->base.info.bind & PIPE_BIND_RENDER_TARGET) {
+        nine_context_clear_render_target(This, surf, color, x, y, w, h);
     } else {
         D3DLOCKED_RECT lock;
         union util_color uc;
         HRESULT hr;
         /* XXX: lock pRect and fix util_fill_rect */
-        hr = NineSurface9_LockRect(surf, &lock, NULL, 0);
+        hr = NineSurface9_LockRect(surf, &lock, NULL, pRect ? 0 : D3DLOCK_DISCARD);
         if (FAILED(hr))
             return hr;
         util_pack_color_ub(color >> 16, color >> 8, color >> 0, color >> 24,
@@ -1811,14 +1861,12 @@ NineDevice9_SetRenderTarget( struct NineDevice9 *This,
         This->state.scissor.miny = 0;
         This->state.scissor.maxx = rt->desc.Width;
         This->state.scissor.maxy = rt->desc.Height;
-
-        This->state.changed.group |= NINE_STATE_VIEWPORT | NINE_STATE_SCISSOR;
     }
 
-    if (This->state.rt[i] != NineSurface9(pRenderTarget)) {
-       nine_bind(&This->state.rt[i], pRenderTarget);
-       This->state.changed.group |= NINE_STATE_FB;
-    }
+    if (This->state.rt[i] != NineSurface9(pRenderTarget))
+        nine_bind(&This->state.rt[i], pRenderTarget);
+
+    nine_context_set_render_target(This, i, rt);
     return D3D_OK;
 }
 
@@ -1844,11 +1892,12 @@ HRESULT NINE_WINAPI
 NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
                                     IDirect3DSurface9 *pNewZStencil )
 {
+    struct NineSurface9 *ds = NineSurface9(pNewZStencil);
     DBG("This=%p pNewZStencil=%p\n", This, pNewZStencil);
 
-    if (This->state.ds != NineSurface9(pNewZStencil)) {
-        nine_bind(&This->state.ds, pNewZStencil);
-        This->state.changed.group |= NINE_STATE_FB;
+    if (This->state.ds != ds) {
+        nine_bind(&This->state.ds, ds);
+        nine_context_set_depth_stencil(This, ds);
     }
     return D3D_OK;
 }
@@ -1895,16 +1944,7 @@ NineDevice9_Clear( struct NineDevice9 *This,
                    float Z,
                    DWORD Stencil )
 {
-    const int sRGB = This->state.rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
-    struct pipe_surface *cbuf, *zsbuf;
-    struct pipe_context *pipe = This->pipe;
     struct NineSurface9 *zsbuf_surf = This->state.ds;
-    struct NineSurface9 *rt;
-    unsigned bufs = 0;
-    unsigned r, i;
-    union pipe_color_union rgba;
-    unsigned rt_mask = 0;
-    D3DRECT rect;
 
     DBG("This=%p Count=%u pRects=%p Flags=%x Color=%08x Z=%f Stencil=%x\n",
         This, Count, pRects, Flags, Color, Z, Stencil);
@@ -1925,126 +1965,7 @@ NineDevice9_Clear( struct NineDevice9 *This,
         Count = 0;
 #endif
 
-    nine_update_state_framebuffer_clear(This);
-
-    if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
-    /* Ignore Z buffer if not bound */
-    if (This->state.fb.zsbuf != NULL) {
-        if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
-        if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
-    }
-    if (!bufs)
-        return D3D_OK;
-    d3dcolor_to_pipe_color_union(&rgba, Color);
-
-    rect.x1 = This->state.viewport.X;
-    rect.y1 = This->state.viewport.Y;
-    rect.x2 = This->state.viewport.Width + rect.x1;
-    rect.y2 = This->state.viewport.Height + rect.y1;
-
-    /* Both rectangles apply, which is weird, but that's D3D9. */
-    if (This->state.rs[D3DRS_SCISSORTESTENABLE]) {
-        rect.x1 = MAX2(rect.x1, This->state.scissor.minx);
-        rect.y1 = MAX2(rect.y1, This->state.scissor.miny);
-        rect.x2 = MIN2(rect.x2, This->state.scissor.maxx);
-        rect.y2 = MIN2(rect.y2, This->state.scissor.maxy);
-    }
-
-    if (Count) {
-        /* Maybe apps like to specify a large rect ? */
-        if (pRects[0].x1 <= rect.x1 && pRects[0].x2 >= rect.x2 &&
-            pRects[0].y1 <= rect.y1 && pRects[0].y2 >= rect.y2) {
-            DBG("First rect covers viewport.\n");
-            Count = 0;
-            pRects = NULL;
-        }
-    }
-
-    if (rect.x1 >= This->state.fb.width || rect.y1 >= This->state.fb.height)
-        return D3D_OK;
-
-    for (i = 0; i < This->caps.NumSimultaneousRTs; ++i) {
-        if (This->state.rt[i] && This->state.rt[i]->desc.Format != D3DFMT_NULL)
-            rt_mask |= 1 << i;
-    }
-
-    /* fast path, clears everything at once */
-    if (!Count &&
-        (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == This->state.rt_mask)) &&
-        rect.x1 == 0 && rect.y1 == 0 &&
-        /* Case we clear only render target. Check clear region vs rt. */
-        ((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
-         rect.x2 >= This->state.fb.width &&
-         rect.y2 >= This->state.fb.height) ||
-        /* Case we clear depth buffer (and eventually rt too).
-         * depth buffer size is always >= rt size. Compare to clear region */
-        ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
-         rect.x2 >= zsbuf_surf->desc.Width &&
-         rect.y2 >= zsbuf_surf->desc.Height))) {
-        DBG("Clear fast path\n");
-        pipe->clear(pipe, bufs, &rgba, Z, Stencil);
-        return D3D_OK;
-    }
-
-    if (!Count) {
-        Count = 1;
-        pRects = &rect;
-    }
-
-    for (i = 0; i < This->caps.NumSimultaneousRTs; ++i) {
-        rt = This->state.rt[i];
-        if (!rt || rt->desc.Format == D3DFMT_NULL ||
-            !(Flags & D3DCLEAR_TARGET))
-            continue; /* save space, compiler should hoist this */
-        cbuf = NineSurface9_GetSurface(rt, sRGB);
-        for (r = 0; r < Count; ++r) {
-            /* Don't trust users to pass these in the right order. */
-            unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
-            unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
-            unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
-            unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
-#ifndef NINE_LAX
-            /* Drop negative rectangles (like wine expects). */
-            if (pRects[r].x1 > pRects[r].x2) continue;
-            if (pRects[r].y1 > pRects[r].y2) continue;
-#endif
-
-            x1 = MAX2(x1, rect.x1);
-            y1 = MAX2(y1, rect.y1);
-            x2 = MIN3(x2, rect.x2, rt->desc.Width);
-            y2 = MIN3(y2, rect.y2, rt->desc.Height);
-
-            DBG("Clearing (%u..%u)x(%u..%u)\n", x1, x2, y1, y2);
-            pipe->clear_render_target(pipe, cbuf, &rgba,
-                                      x1, y1, x2 - x1, y2 - y1);
-        }
-    }
-    if (!(Flags & NINED3DCLEAR_DEPTHSTENCIL))
-        return D3D_OK;
-
-    bufs &= PIPE_CLEAR_DEPTHSTENCIL;
-
-    for (r = 0; r < Count; ++r) {
-        unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
-        unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
-        unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
-        unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
-#ifndef NINE_LAX
-        /* Drop negative rectangles. */
-        if (pRects[r].x1 > pRects[r].x2) continue;
-        if (pRects[r].y1 > pRects[r].y2) continue;
-#endif
-
-        x1 = MIN2(x1, rect.x1);
-        y1 = MIN2(y1, rect.y1);
-        x2 = MIN3(x2, rect.x2, zsbuf_surf->desc.Width);
-        y2 = MIN3(y2, rect.y2, zsbuf_surf->desc.Height);
-
-        zsbuf = NineSurface9_GetSurface(zsbuf_surf, 0);
-        assert(zsbuf);
-        pipe->clear_depth_stencil(pipe, zsbuf, bufs, Z, Stencil,
-                                  x1, y1, x2 - x1, y2 - y1);
-    }
+    nine_context_clear_fb(This, Count, pRects, Flags, Color, Z, Stencil);
     return D3D_OK;
 }
 
@@ -2054,15 +1975,18 @@ NineDevice9_SetTransform( struct NineDevice9 *This,
                           const D3DMATRIX *pMatrix )
 {
     struct nine_state *state = This->update;
-    D3DMATRIX *M = nine_state_access_transform(state, State, TRUE);
+    D3DMATRIX *M = nine_state_access_transform(&state->ff, State, TRUE);
 
     DBG("This=%p State=%d pMatrix=%p\n", This, State, pMatrix);
 
     user_assert(M, D3DERR_INVALIDCALL);
 
     *M = *pMatrix;
-    state->ff.changed.transform[State / 32] |= 1 << (State % 32);
-    state->changed.group |= NINE_STATE_FF;
+    if (unlikely(This->is_recording)) {
+        state->ff.changed.transform[State / 32] |= 1 << (State % 32);
+        state->changed.group |= NINE_STATE_FF;
+    } else
+        nine_context_set_transform(This, State, pMatrix);
 
     return D3D_OK;
 }
@@ -2072,7 +1996,7 @@ NineDevice9_GetTransform( struct NineDevice9 *This,
                           D3DTRANSFORMSTATETYPE State,
                           D3DMATRIX *pMatrix )
 {
-    D3DMATRIX *M = nine_state_access_transform(&This->state, State, FALSE);
+    D3DMATRIX *M = nine_state_access_transform(&This->state.ff, State, FALSE);
     user_assert(M, D3DERR_INVALIDCALL);
     *pMatrix = *M;
     return D3D_OK;
@@ -2085,7 +2009,7 @@ NineDevice9_MultiplyTransform( struct NineDevice9 *This,
 {
     struct nine_state *state = This->update;
     D3DMATRIX T;
-    D3DMATRIX *M = nine_state_access_transform(state, State, TRUE);
+    D3DMATRIX *M = nine_state_access_transform(&state->ff, State, TRUE);
 
     DBG("This=%p State=%d pMatrix=%p\n", This, State, pMatrix);
 
@@ -2106,7 +2030,7 @@ NineDevice9_SetViewport( struct NineDevice9 *This,
         pViewport->MinZ, pViewport->MaxZ);
 
     state->viewport = *pViewport;
-    state->changed.group |= NINE_STATE_VIEWPORT;
+    nine_context_set_viewport(This, pViewport);
 
     return D3D_OK;
 }
@@ -2132,7 +2056,10 @@ NineDevice9_SetMaterial( struct NineDevice9 *This,
     user_assert(pMaterial, E_POINTER);
 
     state->ff.material = *pMaterial;
-    state->changed.group |= NINE_STATE_FF_MATERIAL;
+    if (unlikely(This->is_recording))
+        state->changed.group |= NINE_STATE_FF_MATERIAL;
+    else
+        nine_context_set_material(This, pMaterial);
 
     return D3D_OK;
 }
@@ -2152,6 +2079,7 @@ NineDevice9_SetLight( struct NineDevice9 *This,
                       const D3DLIGHT9 *pLight )
 {
     struct nine_state *state = This->update;
+    HRESULT hr;
 
     DBG("This=%p Index=%u pLight=%p\n", This, Index, pLight);
     if (pLight)
@@ -2162,27 +2090,10 @@ NineDevice9_SetLight( struct NineDevice9 *This,
 
     user_assert(Index < NINE_MAX_LIGHTS, D3DERR_INVALIDCALL); /* sanity */
 
-    if (Index >= state->ff.num_lights) {
-        unsigned n = state->ff.num_lights;
-        unsigned N = Index + 1;
-
-        state->ff.light = REALLOC(state->ff.light, n * sizeof(D3DLIGHT9),
-                                                   N * sizeof(D3DLIGHT9));
-        if (!state->ff.light)
-            return E_OUTOFMEMORY;
-        state->ff.num_lights = N;
-
-        for (; n < Index; ++n) {
-            memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9));
-            state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
-        }
-    }
-    state->ff.light[Index] = *pLight;
+    hr = nine_state_set_light(&state->ff, Index, pLight);
+    if (hr != D3D_OK)
+        return hr;
 
-    if (pLight->Type == D3DLIGHT_SPOT && pLight->Theta >= pLight->Phi) {
-        DBG("Warning: clamping D3DLIGHT9.Theta\n");
-        state->ff.light[Index].Theta = state->ff.light[Index].Phi;
-    }
     if (pLight->Type != D3DLIGHT_DIRECTIONAL &&
         pLight->Attenuation0 == 0.0f &&
         pLight->Attenuation1 == 0.0f &&
@@ -2190,7 +2101,10 @@ NineDevice9_SetLight( struct NineDevice9 *This,
         DBG("Warning: all D3DLIGHT9.Attenuation[i] are 0\n");
     }
 
-    state->changed.group |= NINE_STATE_FF_LIGHTING;
+    if (unlikely(This->is_recording))
+        state->changed.group |= NINE_STATE_FF_LIGHTING;
+    else
+        nine_context_set_light(This, Index, pLight);
 
     return D3D_OK;
 }
@@ -2218,7 +2132,6 @@ NineDevice9_LightEnable( struct NineDevice9 *This,
                          BOOL Enable )
 {
     struct nine_state *state = This->update;
-    unsigned i;
 
     DBG("This=%p Index=%u Enable=%i\n", This, Index, Enable);
 
@@ -2234,30 +2147,10 @@ NineDevice9_LightEnable( struct NineDevice9 *This,
         light.Direction.z = 1.0f;
         NineDevice9_SetLight(This, Index, &light);
     }
-    user_assert(Index < state->ff.num_lights, D3DERR_INVALIDCALL);
 
-    for (i = 0; i < state->ff.num_lights_active; ++i) {
-        if (state->ff.active_light[i] == Index)
-            break;
-    }
-
-    if (Enable) {
-        if (i < state->ff.num_lights_active)
-            return D3D_OK;
-        /* XXX wine thinks this should still succeed:
-         */
-        user_assert(i < NINE_MAX_LIGHTS_ACTIVE, D3DERR_INVALIDCALL);
-
-        state->ff.active_light[i] = Index;
-        state->ff.num_lights_active++;
-    } else {
-        if (i == state->ff.num_lights_active)
-            return D3D_OK;
-        --state->ff.num_lights_active;
-        for (; i < state->ff.num_lights_active; ++i)
-            state->ff.active_light[i] = state->ff.active_light[i + 1];
-    }
-    state->changed.group |= NINE_STATE_FF_LIGHTING;
+    nine_state_light_enable(&state->ff, &state->changed.group, Index, Enable);
+    if (likely(!This->is_recording))
+        nine_context_light_enable(This, Index, Enable);
 
     return D3D_OK;
 }
@@ -2299,7 +2192,10 @@ NineDevice9_SetClipPlane( struct NineDevice9 *This,
     user_assert(Index < PIPE_MAX_CLIP_PLANES, D3DERR_INVALIDCALL);
 
     memcpy(&state->clip.ucp[Index][0], pPlane, sizeof(state->clip.ucp[0]));
-    state->changed.ucp |= 1 << Index;
+    if (unlikely(This->is_recording))
+        state->changed.ucp |= 1 << Index;
+    else
+        nine_context_set_clip_plane(This, Index, (struct nine_clipplane *)pPlane);
 
     return D3D_OK;
 }
@@ -2317,64 +2213,6 @@ NineDevice9_GetClipPlane( struct NineDevice9 *This,
     return D3D_OK;
 }
 
-#define RESZ_CODE 0x7fa05000
-
-static HRESULT
-NineDevice9_ResolveZ( struct NineDevice9 *This )
-{
-    struct nine_state *state = &This->state;
-    const struct util_format_description *desc;
-    struct NineSurface9 *source = state->ds;
-    struct NineBaseTexture9 *destination = state->texture[0];
-    struct pipe_resource *src, *dst;
-    struct pipe_blit_info blit;
-
-    DBG("RESZ resolve\n");
-
-    user_assert(source && destination &&
-                destination->base.type == D3DRTYPE_TEXTURE, D3DERR_INVALIDCALL);
-
-    src = source->base.resource;
-    dst = destination->base.resource;
-
-    user_assert(src && dst, D3DERR_INVALIDCALL);
-
-    /* check dst is depth format. we know already for src */
-    desc = util_format_description(dst->format);
-    user_assert(desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS, D3DERR_INVALIDCALL);
-
-    memset(&blit, 0, sizeof(blit));
-    blit.src.resource = src;
-    blit.src.level = 0;
-    blit.src.format = src->format;
-    blit.src.box.z = 0;
-    blit.src.box.depth = 1;
-    blit.src.box.x = 0;
-    blit.src.box.y = 0;
-    blit.src.box.width = src->width0;
-    blit.src.box.height = src->height0;
-
-    blit.dst.resource = dst;
-    blit.dst.level = 0;
-    blit.dst.format = dst->format;
-    blit.dst.box.z = 0;
-    blit.dst.box.depth = 1;
-    blit.dst.box.x = 0;
-    blit.dst.box.y = 0;
-    blit.dst.box.width = dst->width0;
-    blit.dst.box.height = dst->height0;
-
-    blit.mask = PIPE_MASK_ZS;
-    blit.filter = PIPE_TEX_FILTER_NEAREST;
-    blit.scissor_enable = FALSE;
-
-    This->pipe->blit(This->pipe, &blit);
-    return D3D_OK;
-}
-
-#define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
-#define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
-
 HRESULT NINE_WINAPI
 NineDevice9_SetRenderState( struct NineDevice9 *This,
                             D3DRENDERSTATETYPE State,
@@ -2387,36 +2225,19 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
 
     user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
 
-    if (state->rs_advertised[State] == Value && likely(!This->is_recording))
+    if (unlikely(This->is_recording)) {
+        state->rs_advertised[State] = Value;
+        /* only need to record changed render states for stateblocks */
+        state->changed.rs[State / 32] |= 1 << (State % 32);
+        state->changed.group |= nine_render_state_group[State];
         return D3D_OK;
-
-    state->rs_advertised[State] = Value;
-
-    /* Amd hacks (equivalent to GL extensions) */
-    if (unlikely(State == D3DRS_POINTSIZE)) {
-        if (Value == RESZ_CODE)
-            return NineDevice9_ResolveZ(This);
-
-        if (Value == ALPHA_TO_COVERAGE_ENABLE ||
-            Value == ALPHA_TO_COVERAGE_DISABLE) {
-            state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == ALPHA_TO_COVERAGE_ENABLE);
-            state->changed.group |= NINE_STATE_BLEND;
-            return D3D_OK;
-        }
     }
 
-    /* NV hack */
-    if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
-        if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE])) {
-            state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC);
-            state->changed.group |= NINE_STATE_BLEND;
-            return D3D_OK;
-        }
-    }
+    if (state->rs_advertised[State] == Value)
+        return D3D_OK;
 
-    state->rs[State] = nine_fix_render_state_value(State, Value);
-    state->changed.rs[State / 32] |= 1 << (State % 32);
-    state->changed.group |= nine_render_state_group[State];
+    state->rs_advertised[State] = Value;
+    nine_context_set_render_state(This, State, Value);
 
     return D3D_OK;
 }
@@ -2475,10 +2296,12 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
        /* TODO: texture/sampler state */
        memcpy(dst->changed.rs,
               nine_render_states_vertex, sizeof(dst->changed.rs));
-       nine_ranges_insert(&dst->changed.vs_const_f, 0, This->max_vs_const_f,
+       nine_ranges_insert(&dst->changed.vs_const_f, 0, This->may_swvp ? NINE_MAX_CONST_F_SWVP : This->max_vs_const_f,
+                          &This->range_pool);
+       nine_ranges_insert(&dst->changed.vs_const_i, 0, This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I,
+                          &This->range_pool);
+       nine_ranges_insert(&dst->changed.vs_const_b, 0, This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B,
                           &This->range_pool);
-       dst->changed.vs_const_i = 0xffff;
-       dst->changed.vs_const_b = 0xffff;
        for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
            dst->changed.sampler[s] |= 1 << D3DSAMP_DMAPOFFSET;
        if (This->state.ff.num_lights) {
@@ -2496,8 +2319,10 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
     }
     if (Type == D3DSBT_ALL || Type == D3DSBT_PIXELSTATE) {
        dst->changed.group |=
-          NINE_STATE_PS | NINE_STATE_PS_CONST;
-       /* TODO: texture/sampler state */
+          NINE_STATE_PS | NINE_STATE_PS_CONST | NINE_STATE_BLEND |
+          NINE_STATE_FF_OTHER | NINE_STATE_FF_PSSTAGES | NINE_STATE_PS_CONST |
+          NINE_STATE_FB | NINE_STATE_DSA | NINE_STATE_MULTISAMPLE |
+          NINE_STATE_RASTERIZER | NINE_STATE_STENCIL_REF;
        memcpy(dst->changed.rs,
               nine_render_states_pixel, sizeof(dst->changed.rs));
        nine_ranges_insert(&dst->changed.ps_const_f, 0, This->max_ps_const_f,
@@ -2506,6 +2331,10 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
        dst->changed.ps_const_b = 0xffff;
        for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
            dst->changed.sampler[s] |= 0x1ffe;
+       for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
+           dst->ff.changed.tex_stage[s][0] |= 0xffffffff;
+           dst->ff.changed.tex_stage[s][1] |= 0xffffffff;
+       }
     }
     if (Type == D3DSBT_ALL) {
        dst->changed.group |=
@@ -2613,6 +2442,7 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
 {
     struct nine_state *state = This->update;
     struct NineBaseTexture9 *tex = NineBaseTexture9(pTexture);
+    struct NineBaseTexture9 *old;
 
     DBG("This=%p Stage=%u pTexture=%p\n", This, Stage, pTexture);
 
@@ -2626,27 +2456,20 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
     if (Stage >= D3DDMAPSAMPLER)
         Stage = Stage - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
 
-    if (!This->is_recording) {
-        struct NineBaseTexture9 *old = state->texture[Stage];
-        if (old == tex)
-            return D3D_OK;
-
-        state->samplers_shadow &= ~(1 << Stage);
-        if (tex) {
-            state->samplers_shadow |= tex->shadow << Stage;
+    if (This->is_recording) {
+        state->changed.texture |= 1 << Stage;
+        state->changed.group |= NINE_STATE_TEXTURE;
+        nine_bind(&state->texture[Stage], pTexture);
+        return D3D_OK;
+    }
 
-            if ((tex->managed.dirty | tex->dirty_mip) && LIST_IS_EMPTY(&tex->list))
-                list_add(&tex->list, &This->update_textures);
+    old = state->texture[Stage];
+    if (old == tex)
+        return D3D_OK;
 
-            tex->bind_count++;
-        }
-        if (old)
-            old->bind_count--;
-    }
-    nine_bind(&state->texture[Stage], pTexture);
+    NineBindTextureToDevice(This, &state->texture[Stage], tex);
 
-    state->changed.texture |= 1 << Stage;
-    state->changed.group |= NINE_STATE_TEXTURE;
+    nine_context_set_texture(This, Stage, tex);
 
     return D3D_OK;
 }
@@ -2674,7 +2497,6 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
                                   DWORD Value )
 {
     struct nine_state *state = This->update;
-    int bumpmap_index = -1;
 
     DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value);
     nine_dump_D3DTSS_value(DBG_FF, Type, Value);
@@ -2683,39 +2505,14 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
     user_assert(Type < ARRAY_SIZE(state->ff.tex_stage[0]), D3DERR_INVALIDCALL);
 
     state->ff.tex_stage[Stage][Type] = Value;
-    switch (Type) {
-    case D3DTSS_BUMPENVMAT00:
-        bumpmap_index = 4 * Stage;
-        break;
-    case D3DTSS_BUMPENVMAT10:
-        bumpmap_index = 4 * Stage + 1;
-        break;
-    case D3DTSS_BUMPENVMAT01:
-        bumpmap_index = 4 * Stage + 2;
-        break;
-    case D3DTSS_BUMPENVMAT11:
-        bumpmap_index = 4 * Stage + 3;
-        break;
-    case D3DTSS_BUMPENVLSCALE:
-        bumpmap_index = 4 * 8 + 2 * Stage;
-        break;
-    case D3DTSS_BUMPENVLOFFSET:
-        bumpmap_index = 4 * 8 + 2 * Stage + 1;
-        break;
-    case D3DTSS_TEXTURETRANSFORMFLAGS:
-        state->changed.group |= NINE_STATE_PS1X_SHADER;
-        break;
-    default:
-        break;
-    }
-
-    if (bumpmap_index >= 0) {
-        state->bumpmap_vars[bumpmap_index] = Value;
-        state->changed.group |= NINE_STATE_PS_CONST;
-    }
 
-    state->changed.group |= NINE_STATE_FF_PSSTAGES;
-    state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
+    if (unlikely(This->is_recording)) {
+        if (Type == D3DTSS_TEXTURETRANSFORMFLAGS)
+            state->changed.group |= NINE_STATE_PS1X_SHADER;
+        state->changed.group |= NINE_STATE_FF_PSSTAGES;
+        state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
+    } else
+        nine_context_set_texture_stage_state(This, Stage, Type, Value);
 
     return D3D_OK;
 }
@@ -2734,7 +2531,7 @@ NineDevice9_GetSamplerState( struct NineDevice9 *This,
     if (Sampler >= D3DDMAPSAMPLER)
         Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
 
-    *pValue = This->state.samp[Sampler][Type];
+    *pValue = This->state.samp_advertised[Sampler][Type];
     return D3D_OK;
 }
 
@@ -2757,12 +2554,19 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This,
     if (Sampler >= D3DDMAPSAMPLER)
         Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
 
-    if (state->samp[Sampler][Type] != Value || unlikely(This->is_recording)) {
-        state->samp[Sampler][Type] = Value;
+    if (unlikely(This->is_recording)) {
+        state->samp_advertised[Sampler][Type] = Value;
         state->changed.group |= NINE_STATE_SAMPLER;
         state->changed.sampler[Sampler] |= 1 << Type;
+        return D3D_OK;
     }
 
+    if (state->samp_advertised[Sampler][Type] == Value)
+        return D3D_OK;
+
+    state->samp_advertised[Sampler][Type] = Value;
+    nine_context_set_sampler_state(This, Sampler, Type, Value);
+
     return D3D_OK;
 }
 
@@ -2776,9 +2580,9 @@ NineDevice9_ValidateDevice( struct NineDevice9 *This,
 
     DBG("This=%p pNumPasses=%p\n", This, pNumPasses);
 
-    for (i = 0; i < ARRAY_SIZE(state->samp); ++i) {
-        if (state->samp[i][D3DSAMP_MINFILTER] == D3DTEXF_NONE ||
-            state->samp[i][D3DSAMP_MAGFILTER] == D3DTEXF_NONE)
+    for (i = 0; i < ARRAY_SIZE(state->samp_advertised); ++i) {
+        if (state->samp_advertised[i][D3DSAMP_MINFILTER] == D3DTEXF_NONE ||
+            state->samp_advertised[i][D3DSAMP_MAGFILTER] == D3DTEXF_NONE)
             return D3DERR_UNSUPPORTEDTEXTUREFILTER;
     }
 
@@ -2794,7 +2598,7 @@ NineDevice9_ValidateDevice( struct NineDevice9 *This,
         }
     }
     if (state->ds &&
-        (state->rs[D3DRS_ZENABLE] || state->rs[D3DRS_STENCILENABLE])) {
+        (state->rs_advertised[D3DRS_ZENABLE] || state->rs_advertised[D3DRS_STENCILENABLE])) {
         if (w != 0 &&
             (state->ds->desc.Width != w || state->ds->desc.Height != h))
             return D3DERR_CONFLICTINGRENDERSTATE;
@@ -2850,7 +2654,10 @@ NineDevice9_SetScissorRect( struct NineDevice9 *This,
     state->scissor.maxx = pRect->right;
     state->scissor.maxy = pRect->bottom;
 
-    state->changed.group |= NINE_STATE_SCISSOR;
+    if (unlikely(This->is_recording))
+        state->changed.group |= NINE_STATE_SCISSOR;
+    else
+        nine_context_set_scissor(This, &state->scissor);
 
     return D3D_OK;
 }
@@ -2871,20 +2678,25 @@ HRESULT NINE_WINAPI
 NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
                                          BOOL bSoftware )
 {
-    STUB(D3DERR_INVALIDCALL);
+    if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
+        This->swvp = bSoftware;
+        nine_context_set_swvp(This, bSoftware);
+        return D3D_OK;
+    } else
+        return D3DERR_INVALIDCALL; /* msdn. TODO: check in practice */
 }
 
 BOOL NINE_WINAPI
 NineDevice9_GetSoftwareVertexProcessing( struct NineDevice9 *This )
 {
-    return !!(This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
+    return This->swvp;
 }
 
 HRESULT NINE_WINAPI
 NineDevice9_SetNPatchMode( struct NineDevice9 *This,
                            float nSegments )
 {
-    STUB(D3DERR_INVALIDCALL);
+    return D3D_OK; /* Nothing to do because we don't advertise NPatch support */
 }
 
 float NINE_WINAPI
@@ -2893,20 +2705,53 @@ NineDevice9_GetNPatchMode( struct NineDevice9 *This )
     STUB(0);
 }
 
-static inline void
-init_draw_info(struct pipe_draw_info *info,
-               struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
+/* TODO: only go through dirty textures */
+static void
+validate_textures(struct NineDevice9 *device)
 {
-    info->mode = d3dprimitivetype_to_pipe_prim(type);
-    info->count = prim_count_to_vertex_count(type, count);
-    info->start_instance = 0;
-    info->instance_count = 1;
-    if (dev->state.stream_instancedata_mask & dev->state.stream_usage_mask)
-        info->instance_count = MAX2(dev->state.stream_freq[0] & 0x7FFFFF, 1);
-    info->primitive_restart = FALSE;
-    info->restart_index = 0;
-    info->count_from_stream_output = NULL;
-    info->indirect = NULL;
+    struct NineBaseTexture9 *tex, *ptr;
+    LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) {
+        list_delinit(&tex->list);
+        NineBaseTexture9_Validate(tex);
+    }
+}
+
+static void
+update_managed_buffers(struct NineDevice9 *device)
+{
+    struct NineBuffer9 *buf, *ptr;
+    LIST_FOR_EACH_ENTRY_SAFE(buf, ptr, &device->update_buffers, managed.list) {
+        list_delinit(&buf->managed.list);
+        NineBuffer9_Upload(buf);
+    }
+}
+
+static void
+NineBeforeDraw( struct NineDevice9 *This )
+{
+    /* Upload Managed dirty content */
+    validate_textures(This); /* may clobber state */
+    update_managed_buffers(This);
+}
+
+static void
+NineAfterDraw( struct NineDevice9 *This )
+{
+    unsigned i;
+    struct nine_state *state = &This->state;
+    unsigned ps_mask = state->ps ? state->ps->rt_mask : 1;
+
+    /* Flag render-targets with autogenmipmap for mipmap regeneration */
+    for (i = 0; i < This->caps.NumSimultaneousRTs; ++i) {
+        struct NineSurface9 *rt = state->rt[i];
+
+        if (rt && rt->desc.Format != D3DFMT_NULL && (ps_mask & (1 << i)) &&
+            rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP) {
+            assert(rt->texture == D3DRTYPE_TEXTURE ||
+                   rt->texture == D3DRTYPE_CUBETEXTURE);
+            NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE;
+        }
+    }
 }
 
 HRESULT NINE_WINAPI
@@ -2915,21 +2760,12 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This,
                            UINT StartVertex,
                            UINT PrimitiveCount )
 {
-    struct pipe_draw_info info;
-
     DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n",
         This, PrimitiveType, StartVertex, PrimitiveCount);
 
-    nine_update_state(This);
-
-    init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
-    info.indexed = FALSE;
-    info.start = StartVertex;
-    info.index_bias = 0;
-    info.min_index = info.start;
-    info.max_index = info.count - 1;
-
-    This->pipe->draw_vbo(This->pipe, &info);
+    NineBeforeDraw(This);
+    nine_context_draw_primitive(This, PrimitiveType, StartVertex, PrimitiveCount);
+    NineAfterDraw(This);
 
     return D3D_OK;
 }
@@ -2943,8 +2779,6 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
                                   UINT StartIndex,
                                   UINT PrimitiveCount )
 {
-    struct pipe_draw_info info;
-
     DBG("iface %p, PrimitiveType %u, BaseVertexIndex %u, MinVertexIndex %u "
         "NumVertices %u, StartIndex %u, PrimitiveCount %u\n",
         This, PrimitiveType, BaseVertexIndex, MinVertexIndex, NumVertices,
@@ -2953,17 +2787,11 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
     user_assert(This->state.idxbuf, D3DERR_INVALIDCALL);
     user_assert(This->state.vdecl, D3DERR_INVALIDCALL);
 
-    nine_update_state(This);
-
-    init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
-    info.indexed = TRUE;
-    info.start = StartIndex;
-    info.index_bias = BaseVertexIndex;
-    /* These don't include index bias: */
-    info.min_index = MinVertexIndex;
-    info.max_index = MinVertexIndex + NumVertices - 1;
-
-    This->pipe->draw_vbo(This->pipe, &info);
+    NineBeforeDraw(This);
+    nine_context_draw_indexed_primitive(This, PrimitiveType, BaseVertexIndex,
+                                        MinVertexIndex, NumVertices, StartIndex,
+                                        PrimitiveCount);
+    NineAfterDraw(This);
 
     return D3D_OK;
 }
@@ -2976,7 +2804,6 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
                              UINT VertexStreamZeroStride )
 {
     struct pipe_vertex_buffer vtxbuf;
-    struct pipe_draw_info info;
 
     DBG("iface %p, PrimitiveType %u, PrimitiveCount %u, data %p, stride %u\n",
         This, PrimitiveType, PrimitiveCount,
@@ -2984,43 +2811,36 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
 
     user_assert(pVertexStreamZeroData && VertexStreamZeroStride,
                 D3DERR_INVALIDCALL);
-
-    nine_update_state(This);
-
-    init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
-    info.indexed = FALSE;
-    info.start = 0;
-    info.index_bias = 0;
-    info.min_index = 0;
-    info.max_index = info.count - 1;
+    user_assert(PrimitiveCount, D3D_OK);
 
     vtxbuf.stride = VertexStreamZeroStride;
     vtxbuf.buffer_offset = 0;
-    vtxbuf.buffer = NULL;
-    vtxbuf.user_buffer = pVertexStreamZeroData;
+    vtxbuf.is_user_buffer = true;
+    vtxbuf.buffer.user = pVertexStreamZeroData;
 
     if (!This->driver_caps.user_vbufs) {
+        vtxbuf.is_user_buffer = false;
+        vtxbuf.buffer.resource = NULL;
         u_upload_data(This->vertex_uploader,
                       0,
-                      (info.max_index + 1) * VertexStreamZeroStride, /* XXX */
+                      (prim_count_to_vertex_count(PrimitiveType, PrimitiveCount)) * VertexStreamZeroStride, /* XXX */
                       4,
-                      vtxbuf.user_buffer,
+                      pVertexStreamZeroData,
                       &vtxbuf.buffer_offset,
-                      &vtxbuf.buffer);
+                      &vtxbuf.buffer.resource);
         u_upload_unmap(This->vertex_uploader);
-        vtxbuf.user_buffer = NULL;
     }
 
-    This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vtxbuf);
+    NineBeforeDraw(This);
+    nine_context_draw_primitive_from_vtxbuf(This, PrimitiveType, PrimitiveCount, &vtxbuf);
+    NineAfterDraw(This);
 
-    This->pipe->draw_vbo(This->pipe, &info);
+    pipe_vertex_buffer_unreference(&vtxbuf);
 
     NineDevice9_PauseRecording(This);
     NineDevice9_SetStreamSource(This, 0, NULL, 0, 0);
     NineDevice9_ResumeRecording(This);
 
-    pipe_resource_reference(&vtxbuf.buffer, NULL);
-
     return D3D_OK;
 }
 
@@ -3035,9 +2855,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
                                     const void *pVertexStreamZeroData,
                                     UINT VertexStreamZeroStride )
 {
-    struct pipe_draw_info info;
     struct pipe_vertex_buffer vbuf;
-    struct pipe_index_buffer ibuf;
 
     DBG("iface %p, PrimitiveType %u, MinVertexIndex %u, NumVertices %u "
         "PrimitiveCount %u, pIndexData %p, IndexDataFormat %u "
@@ -3050,60 +2868,58 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
     user_assert(VertexStreamZeroStride, D3DERR_INVALIDCALL);
     user_assert(IndexDataFormat == D3DFMT_INDEX16 ||
                 IndexDataFormat == D3DFMT_INDEX32, D3DERR_INVALIDCALL);
-
-    nine_update_state(This);
-
-    init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
-    info.indexed = TRUE;
-    info.start = 0;
-    info.index_bias = 0;
-    info.min_index = MinVertexIndex;
-    info.max_index = MinVertexIndex + NumVertices - 1;
+    user_assert(PrimitiveCount, D3D_OK);
 
     vbuf.stride = VertexStreamZeroStride;
     vbuf.buffer_offset = 0;
-    vbuf.buffer = NULL;
-    vbuf.user_buffer = pVertexStreamZeroData;
+    vbuf.is_user_buffer = true;
+    vbuf.buffer.user = pVertexStreamZeroData;
 
-    ibuf.index_size = (IndexDataFormat == D3DFMT_INDEX16) ? 2 : 4;
-    ibuf.offset = 0;
-    ibuf.buffer = NULL;
-    ibuf.user_buffer = pIndexData;
+    unsigned index_size = (IndexDataFormat == D3DFMT_INDEX16) ? 2 : 4;
+    struct pipe_resource *ibuf = NULL;
 
     if (!This->driver_caps.user_vbufs) {
-        const unsigned base = info.min_index * VertexStreamZeroStride;
+        const unsigned base = MinVertexIndex * VertexStreamZeroStride;
+        vbuf.is_user_buffer = false;
+        vbuf.buffer.resource = NULL;
         u_upload_data(This->vertex_uploader,
                       base,
-                      (info.max_index -
-                       info.min_index + 1) * VertexStreamZeroStride, /* XXX */
+                      NumVertices * VertexStreamZeroStride, /* XXX */
                       4,
-                      (const uint8_t *)vbuf.user_buffer + base,
+                      (const uint8_t *)pVertexStreamZeroData + base,
                       &vbuf.buffer_offset,
-                      &vbuf.buffer);
+                      &vbuf.buffer.resource);
         u_upload_unmap(This->vertex_uploader);
         /* Won't be used: */
         vbuf.buffer_offset -= base;
-        vbuf.user_buffer = NULL;
     }
-    if (!This->driver_caps.user_ibufs) {
-        u_upload_data(This->index_uploader,
+
+    unsigned index_offset = 0;
+    if (This->csmt_active) {
+        u_upload_data(This->pipe_secondary->stream_uploader,
                       0,
-                      info.count * ibuf.index_size,
+                      (prim_count_to_vertex_count(PrimitiveType, PrimitiveCount)) * index_size,
                       4,
-                      ibuf.user_buffer,
-                      &ibuf.offset,
-                      &ibuf.buffer);
-        u_upload_unmap(This->index_uploader);
-        ibuf.user_buffer = NULL;
-    }
-
-    This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vbuf);
-    This->pipe->set_index_buffer(This->pipe, &ibuf);
-
-    This->pipe->draw_vbo(This->pipe, &info);
-
-    pipe_resource_reference(&vbuf.buffer, NULL);
-    pipe_resource_reference(&ibuf.buffer, NULL);
+                      pIndexData,
+                      &index_offset,
+                      &ibuf);
+        u_upload_unmap(This->pipe_secondary->stream_uploader);
+    }
+
+    NineBeforeDraw(This);
+    nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf(This, PrimitiveType,
+                                                           MinVertexIndex,
+                                                           NumVertices,
+                                                           PrimitiveCount,
+                                                           &vbuf,
+                                                           ibuf,
+                                                           ibuf ? NULL : (void*)pIndexData,
+                                                           index_offset,
+                                                           index_size);
+    NineAfterDraw(This);
+
+    pipe_vertex_buffer_unreference(&vbuf);
+    pipe_resource_reference(&ibuf, NULL);
 
     NineDevice9_PauseRecording(This);
     NineDevice9_SetIndices(This, NULL);
@@ -3113,9 +2929,6 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
     return D3D_OK;
 }
 
-/* TODO: Write to pDestBuffer directly if vertex declaration contains
- * only f32 formats.
- */
 HRESULT NINE_WINAPI
 NineDevice9_ProcessVertices( struct NineDevice9 *This,
                              UINT SrcStartIndex,
@@ -3125,35 +2938,73 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
                              IDirect3DVertexDeclaration9 *pVertexDecl,
                              DWORD Flags )
 {
-    struct pipe_screen *screen = This->screen;
+    struct pipe_screen *screen_sw = This->screen_sw;
+    struct pipe_context *pipe_sw = This->pipe_sw;
     struct NineVertexDeclaration9 *vdecl = NineVertexDeclaration9(pVertexDecl);
+    struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer);
     struct NineVertexShader9 *vs;
     struct pipe_resource *resource;
+    struct pipe_transfer *transfer = NULL;
+    struct pipe_stream_output_info so;
     struct pipe_stream_output_target *target;
     struct pipe_draw_info draw;
+    struct pipe_box box;
+    bool programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+    unsigned offsets[1] = {0};
     HRESULT hr;
-    unsigned buffer_offset, buffer_size;
+    unsigned buffer_size;
+    void *map;
 
     DBG("This=%p SrcStartIndex=%u DestIndex=%u VertexCount=%u "
         "pDestBuffer=%p pVertexDecl=%p Flags=%d\n",
         This, SrcStartIndex, DestIndex, VertexCount, pDestBuffer,
         pVertexDecl, Flags);
 
-    if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS))
-        STUB(D3DERR_INVALIDCALL);
+    if (!screen_sw->get_param(screen_sw, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) {
+        DBG("ProcessVertices not supported\n");
+        return D3DERR_INVALIDCALL;
+    }
 
-    nine_update_state(This);
 
-    /* TODO: Create shader with stream output. */
-    STUB(D3DERR_INVALIDCALL);
-    struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer);
+    vs = programmable_vs ? This->state.vs : This->ff.vs;
+    /* Note: version is 0 for ff */
+    user_assert(vdecl || (vs->byte_code.version < 0x30 && dst->desc.FVF),
+                D3DERR_INVALIDCALL);
+    if (!vdecl) {
+        DWORD FVF = dst->desc.FVF;
+        vdecl = util_hash_table_get(This->ff.ht_fvf, &FVF);
+        if (!vdecl) {
+            hr = NineVertexDeclaration9_new_from_fvf(This, FVF, &vdecl);
+            if (FAILED(hr))
+                return hr;
+            vdecl->fvf = FVF;
+            util_hash_table_set(This->ff.ht_fvf, &vdecl->fvf, vdecl);
+            NineUnknown_ConvertRefToBind(NineUnknown(vdecl));
+        }
+    }
 
-    vs = This->state.vs ? This->state.vs : This->ff.vs;
+    /* Flags: Can be 0 or D3DPV_DONOTCOPYDATA, and/or lock flags
+     * D3DPV_DONOTCOPYDATA -> Has effect only for ff. In particular
+     * if not set, everything from src will be used, and dst
+     * must match exactly the ff vs outputs.
+     * TODO: Handle all the checks, etc for ff */
+    user_assert(vdecl->position_t || programmable_vs,
+                D3DERR_INVALIDCALL);
 
-    buffer_size = VertexCount * vs->so->stride[0];
-    if (1) {
+    /* TODO: Support vs < 3 and ff */
+    user_assert(vs->byte_code.version == 0x30,
+                D3DERR_INVALIDCALL);
+    /* TODO: Not hardcode the constant buffers for swvp */
+    user_assert(This->may_swvp,
+                D3DERR_INVALIDCALL);
+
+    nine_state_prepare_draw_sw(This, vdecl, SrcStartIndex, VertexCount, &so);
+
+    buffer_size = VertexCount * so.stride[0] * 4;
+    {
         struct pipe_resource templ;
 
+        memset(&templ, 0, sizeof(templ));
         templ.target = PIPE_BUFFER;
         templ.format = PIPE_FORMAT_R8_UNORM;
         templ.width0 = buffer_size;
@@ -3163,49 +3014,56 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
         templ.height0 = templ.depth0 = templ.array_size = 1;
         templ.last_level = templ.nr_samples = 0;
 
-        resource = This->screen->resource_create(This->screen, &templ);
+        resource = screen_sw->resource_create(screen_sw, &templ);
         if (!resource)
             return E_OUTOFMEMORY;
-        buffer_offset = 0;
-    } else {
-        /* SO matches vertex declaration */
-        resource = NineVertexBuffer9_GetResource(dst);
-        buffer_offset = DestIndex * vs->so->stride[0];
     }
-    target = This->pipe->create_stream_output_target(This->pipe, resource,
-                                                     buffer_offset,
-                                                     buffer_size);
+    target = pipe_sw->create_stream_output_target(pipe_sw, resource,
+                                                  0, buffer_size);
     if (!target) {
         pipe_resource_reference(&resource, NULL);
         return D3DERR_DRIVERINTERNALERROR;
     }
 
-    if (!vdecl) {
-        hr = NineVertexDeclaration9_new_from_fvf(This, dst->desc.FVF, &vdecl);
-        if (FAILED(hr))
-            goto out;
-    }
-
-    init_draw_info(&draw, This, D3DPT_POINTLIST, VertexCount);
+    draw.mode = PIPE_PRIM_POINTS;
+    draw.count = VertexCount;
+    draw.start_instance = 0;
+    draw.primitive_restart = FALSE;
+    draw.restart_index = 0;
+    draw.count_from_stream_output = NULL;
+    draw.indirect = NULL;
     draw.instance_count = 1;
-    draw.indexed = FALSE;
-    draw.start = SrcStartIndex;
+    draw.index_size = 0;
+    draw.start = 0;
     draw.index_bias = 0;
-    draw.min_index = SrcStartIndex;
-    draw.max_index = SrcStartIndex + VertexCount - 1;
+    draw.min_index = 0;
+    draw.max_index = VertexCount - 1;
+
 
-    This->pipe->set_stream_output_targets(This->pipe, 1, &target, 0);
-    This->pipe->draw_vbo(This->pipe, &draw);
-    This->pipe->set_stream_output_targets(This->pipe, 0, NULL, 0);
-    This->pipe->stream_output_target_destroy(This->pipe, target);
+    pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets);
+
+    pipe_sw->draw_vbo(pipe_sw, &draw);
+
+    pipe_sw->set_stream_output_targets(pipe_sw, 0, NULL, 0);
+    pipe_sw->stream_output_target_destroy(pipe_sw, target);
+
+    u_box_1d(0, VertexCount * so.stride[0] * 4, &box);
+    map = pipe_sw->transfer_map(pipe_sw, resource, 0, PIPE_TRANSFER_READ, &box,
+                                &transfer);
+    if (!map) {
+        hr = D3DERR_DRIVERINTERNALERROR;
+        goto out;
+    }
 
     hr = NineVertexDeclaration9_ConvertStreamOutput(vdecl,
                                                     dst, DestIndex, VertexCount,
-                                                    resource, vs->so);
+                                                    map, &so);
+    if (transfer)
+        pipe_sw->transfer_unmap(pipe_sw, transfer);
+
 out:
+    nine_state_after_draw_sw(This);
     pipe_resource_reference(&resource, NULL);
-    if (!pVertexDecl)
-        NineUnknown_Release(NineUnknown(vdecl));
     return hr;
 }
 
@@ -3231,22 +3089,22 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
                                   IDirect3DVertexDeclaration9 *pDecl )
 {
     struct nine_state *state = This->update;
-    BOOL was_programmable_vs = This->state.programmable_vs;
+    struct NineVertexDeclaration9 *vdecl = NineVertexDeclaration9(pDecl);
 
     DBG("This=%p pDecl=%p\n", This, pDecl);
 
-    if (likely(!This->is_recording) && state->vdecl == NineVertexDeclaration9(pDecl))
+    if (unlikely(This->is_recording)) {
+        nine_bind(&state->vdecl, vdecl);
+        state->changed.group |= NINE_STATE_VDECL;
         return D3D_OK;
+    }
 
-    nine_bind(&state->vdecl, pDecl);
+    if (state->vdecl == vdecl)
+        return D3D_OK;
 
-    This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
-    if (likely(!This->is_recording) && was_programmable_vs != This->state.programmable_vs) {
-        state->commit |= NINE_STATE_COMMIT_CONST_VS;
-        state->changed.group |= NINE_STATE_VS;
-    }
+    nine_bind(&state->vdecl, vdecl);
 
-    state->changed.group |= NINE_STATE_VDECL;
+    nine_context_set_vertex_declaration(This, vdecl);
 
     return D3D_OK;
 }
@@ -3317,22 +3175,22 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This,
                              IDirect3DVertexShader9 *pShader )
 {
     struct nine_state *state = This->update;
-    BOOL was_programmable_vs = This->state.programmable_vs;
+    struct NineVertexShader9 *vs_shader = (struct NineVertexShader9*)pShader;
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
-    if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
-      return D3D_OK;
-
-    nine_bind(&state->vs, pShader);
+    if (unlikely(This->is_recording)) {
+        nine_bind(&state->vs, vs_shader);
+        state->changed.group |= NINE_STATE_VS;
+        return D3D_OK;
+    }
 
-    This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+    if (state->vs == vs_shader)
+      return D3D_OK;
 
-    /* ff -> non-ff: commit back non-ff constants */
-    if (!was_programmable_vs && This->state.programmable_vs)
-        state->commit |= NINE_STATE_COMMIT_CONST_VS;
+    nine_bind(&state->vs, vs_shader);
 
-    state->changed.group |= NINE_STATE_VS;
+    nine_context_set_vertex_shader(This, vs_shader);
 
     return D3D_OK;
 }
@@ -3353,6 +3211,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
                                       UINT Vector4fCount )
 {
     struct nine_state *state = This->update;
+    float *vs_const_f = state->vs_const_f;
 
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n",
         This, StartRegister, pConstantData, Vector4fCount);
@@ -3364,21 +3223,31 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
        return D3D_OK;
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    if (!This->is_recording) {
-        if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
-                    Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
-            return D3D_OK;
+    if (unlikely(This->is_recording)) {
+        memcpy(&vs_const_f[StartRegister * 4],
+               pConstantData,
+               Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
+
+        nine_ranges_insert(&state->changed.vs_const_f,
+                           StartRegister, StartRegister + Vector4fCount,
+                           &This->range_pool);
+
+        state->changed.group |= NINE_STATE_VS_CONST;
+
+        return D3D_OK;
     }
 
-    memcpy(&state->vs_const_f[StartRegister * 4],
+    if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData,
+                Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
+        return D3D_OK;
+
+    memcpy(&vs_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
 
-    nine_ranges_insert(&state->changed.vs_const_f,
-                       StartRegister, StartRegister + Vector4fCount,
-                       &This->range_pool);
-
-    state->changed.group |= NINE_STATE_VS_CONST;
+    nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData,
+                                              Vector4fCount * 4 * sizeof(state->vs_const_f[0]),
+                                              Vector4fCount);
 
     return D3D_OK;
 }
@@ -3414,30 +3283,38 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n",
         This, StartRegister, pConstantData, Vector4iCount);
 
-    user_assert(StartRegister                  < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (This->driver_caps.vs_integer) {
         if (!This->is_recording) {
-            if (!memcmp(&state->vs_const_i[StartRegister][0], pConstantData,
-                        Vector4iCount * sizeof(state->vs_const_i[0])))
+            if (!memcmp(&state->vs_const_i[4 * StartRegister], pConstantData,
+                        Vector4iCount * sizeof(int[4])))
                 return D3D_OK;
         }
-        memcpy(&state->vs_const_i[StartRegister][0],
+        memcpy(&state->vs_const_i[4 * StartRegister],
                pConstantData,
-               Vector4iCount * sizeof(state->vs_const_i[0]));
+               Vector4iCount * sizeof(int[4]));
     } else {
         for (i = 0; i < Vector4iCount; i++) {
-            state->vs_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
-            state->vs_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
-            state->vs_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
-            state->vs_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
+            state->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i]));
+            state->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1]));
+            state->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2]));
+            state->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3]));
         }
     }
 
-    state->changed.vs_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
-    state->changed.group |= NINE_STATE_VS_CONST;
+    if (unlikely(This->is_recording)) {
+        nine_ranges_insert(&state->changed.vs_const_i,
+                           StartRegister, StartRegister + Vector4iCount,
+                           &This->range_pool);
+        state->changed.group |= NINE_STATE_VS_CONST;
+    } else
+        nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData,
+                                                  Vector4iCount * sizeof(int[4]), Vector4iCount);
 
     return D3D_OK;
 }
@@ -3451,20 +3328,22 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
     const struct nine_state *state = &This->state;
     int i;
 
-    user_assert(StartRegister                  < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (This->driver_caps.vs_integer) {
         memcpy(pConstantData,
-               &state->vs_const_i[StartRegister][0],
-               Vector4iCount * sizeof(state->vs_const_i[0]));
+               &state->vs_const_i[4 * StartRegister],
+               Vector4iCount * sizeof(int[4]));
     } else {
         for (i = 0; i < Vector4iCount; i++) {
-            pConstantData[4*i] = (int32_t) uif(state->vs_const_i[StartRegister+i][0]);
-            pConstantData[4*i+1] = (int32_t) uif(state->vs_const_i[StartRegister+i][1]);
-            pConstantData[4*i+2] = (int32_t) uif(state->vs_const_i[StartRegister+i][2]);
-            pConstantData[4*i+3] = (int32_t) uif(state->vs_const_i[StartRegister+i][3]);
+            pConstantData[4 * i] = (int32_t) uif(state->vs_const_i[4 * (StartRegister + i)]);
+            pConstantData[4 * i + 1] = (int32_t) uif(state->vs_const_i[4 * (StartRegister + i) + 1]);
+            pConstantData[4 * i + 2] = (int32_t) uif(state->vs_const_i[4 * (StartRegister + i) + 2]);
+            pConstantData[4 * i + 3] = (int32_t) uif(state->vs_const_i[4 * (StartRegister + i) + 3]);
         }
     }
 
@@ -3484,8 +3363,10 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
     DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n",
         This, StartRegister, pConstantData, BoolCount);
 
-    user_assert(StartRegister              < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (!This->is_recording) {
@@ -3501,8 +3382,14 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
     for (i = 0; i < BoolCount; i++)
         state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
 
-    state->changed.vs_const_b |= ((1 << BoolCount) - 1) << StartRegister;
-    state->changed.group |= NINE_STATE_VS_CONST;
+    if (unlikely(This->is_recording)) {
+        nine_ranges_insert(&state->changed.vs_const_b,
+                           StartRegister, StartRegister + BoolCount,
+                           &This->range_pool);
+        state->changed.group |= NINE_STATE_VS_CONST;
+    } else
+        nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData,
+                                                  sizeof(BOOL) * BoolCount, BoolCount);
 
     return D3D_OK;
 }
@@ -3516,8 +3403,10 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
     const struct nine_state *state = &This->state;
     int i;
 
-    user_assert(StartRegister              < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     for (i = 0; i < BoolCount; i++)
@@ -3543,22 +3432,31 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This,
     user_assert(StreamNumber < This->caps.MaxStreams, D3DERR_INVALIDCALL);
     user_assert(Stride <= This->caps.MaxStreamStride, D3DERR_INVALIDCALL);
 
-    if (likely(!This->is_recording)) {
-        if (state->stream[i] == NineVertexBuffer9(pStreamData) &&
-            state->vtxbuf[i].stride == Stride &&
-            state->vtxbuf[i].buffer_offset == OffsetInBytes)
-            return D3D_OK;
-    }
-    nine_bind(&state->stream[i], pStreamData);
-
-    state->changed.vtxbuf |= 1 << StreamNumber;
-
-    if (pStreamData) {
+    if (unlikely(This->is_recording)) {
+        nine_bind(&state->stream[i], pStreamData);
+        state->changed.vtxbuf |= 1 << StreamNumber;
         state->vtxbuf[i].stride = Stride;
         state->vtxbuf[i].buffer_offset = OffsetInBytes;
+        return D3D_OK;
     }
-    pipe_resource_reference(&state->vtxbuf[i].buffer,
-                            pStreamData ? NineVertexBuffer9_GetResource(pVBuf9) : NULL);
+
+    if (state->stream[i] == NineVertexBuffer9(pStreamData) &&
+        state->vtxbuf[i].stride == Stride &&
+        state->vtxbuf[i].buffer_offset == OffsetInBytes)
+        return D3D_OK;
+
+    state->vtxbuf[i].stride = Stride;
+    state->vtxbuf[i].buffer_offset = OffsetInBytes;
+
+    NineBindBufferToDevice(This,
+                           (struct NineBuffer9 **)&state->stream[i],
+                           (struct NineBuffer9 *)pVBuf9);
+
+    nine_context_set_stream_source(This,
+                                   StreamNumber,
+                                   pVBuf9,
+                                   OffsetInBytes,
+                                   Stride);
 
     return D3D_OK;
 }
@@ -3601,19 +3499,20 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
                   (Setting & D3DSTREAMSOURCE_INDEXEDDATA)), D3DERR_INVALIDCALL);
     user_assert(Setting, D3DERR_INVALIDCALL);
 
-    if (likely(!This->is_recording) && state->stream_freq[StreamNumber] == Setting)
+    if (unlikely(This->is_recording)) {
+        state->stream_freq[StreamNumber] = Setting;
+        state->changed.stream_freq |= 1 << StreamNumber;
+        if (StreamNumber != 0)
+            state->changed.group |= NINE_STATE_STREAMFREQ;
         return D3D_OK;
+    }
 
-    state->stream_freq[StreamNumber] = Setting;
+    if (state->stream_freq[StreamNumber] == Setting)
+        return D3D_OK;
 
-    if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
-        state->stream_instancedata_mask |= 1 << StreamNumber;
-    else
-        state->stream_instancedata_mask &= ~(1 << StreamNumber);
+    state->stream_freq[StreamNumber] = Setting;
 
-    state->changed.stream_freq |= 1 << StreamNumber; /* Used for stateblocks */
-    if (StreamNumber != 0)
-        state->changed.group |= NINE_STATE_STREAMFREQ;
+    nine_context_set_stream_source_freq(This, StreamNumber, Setting);
     return D3D_OK;
 }
 
@@ -3632,15 +3531,24 @@ NineDevice9_SetIndices( struct NineDevice9 *This,
                         IDirect3DIndexBuffer9 *pIndexData )
 {
     struct nine_state *state = This->update;
+    struct NineIndexBuffer9 *idxbuf = NineIndexBuffer9(pIndexData);
 
     DBG("This=%p pIndexData=%p\n", This, pIndexData);
 
-    if (likely(!This->is_recording))
-        if (state->idxbuf == NineIndexBuffer9(pIndexData))
-            return D3D_OK;
-    nine_bind(&state->idxbuf, pIndexData);
+    if (unlikely(This->is_recording)) {
+        nine_bind(&state->idxbuf, idxbuf);
+        state->changed.group |= NINE_STATE_IDXBUF;
+        return D3D_OK;
+    }
+
+    if (state->idxbuf == idxbuf)
+        return D3D_OK;
 
-    state->changed.group |= NINE_STATE_IDXBUF;
+    NineBindBufferToDevice(This,
+                           (struct NineBuffer9 **)&state->idxbuf,
+                           (struct NineBuffer9 *)idxbuf);
+
+    nine_context_set_indices(This, idxbuf);
 
     return D3D_OK;
 }
@@ -3650,8 +3558,7 @@ NineDevice9_SetIndices( struct NineDevice9 *This,
  */
 HRESULT NINE_WINAPI
 NineDevice9_GetIndices( struct NineDevice9 *This,
-                        IDirect3DIndexBuffer9 **ppIndexData /*,
-                        UINT *pBaseVertexIndex */ )
+                        IDirect3DIndexBuffer9 **ppIndexData)
 {
     user_assert(ppIndexData, D3DERR_INVALIDCALL);
     nine_reference_set(ppIndexData, This->state.idxbuf);
@@ -3680,27 +3587,25 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This,
                             IDirect3DPixelShader9 *pShader )
 {
     struct nine_state *state = This->update;
-    unsigned old_mask = state->ps ? state->ps->rt_mask : 1;
-    unsigned mask;
+    struct NinePixelShader9 *ps = (struct NinePixelShader9*)pShader;
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
-    if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader)
-      return D3D_OK;
-
-    /* ff -> non-ff: commit back non-ff constants */
-    if (!state->ps && pShader)
-        state->commit |= NINE_STATE_COMMIT_CONST_PS;
+    if (unlikely(This->is_recording)) {
+        /* Technically we need NINE_STATE_FB only
+         * if the ps mask changes, but put it always
+         * to be safe */
+        nine_bind(&state->ps, pShader);
+        state->changed.group |= NINE_STATE_PS | NINE_STATE_FB;
+        return D3D_OK;
+    }
 
-    nine_bind(&state->ps, pShader);
+    if (state->ps == ps)
+        return D3D_OK;
 
-    state->changed.group |= NINE_STATE_PS;
+    nine_bind(&state->ps, ps);
 
-    mask = state->ps ? state->ps->rt_mask : 1;
-    /* We need to update cbufs if the pixel shader would
-     * write to different render targets */
-    if (mask != old_mask)
-        state->changed.group |= NINE_STATE_FB;
+    nine_context_set_pixel_shader(This, ps);
 
     return D3D_OK;
 }
@@ -3732,21 +3637,30 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
        return D3D_OK;
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    if (!This->is_recording) {
-        if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
-                    Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
-            return D3D_OK;
+    if (unlikely(This->is_recording)) {
+        memcpy(&state->ps_const_f[StartRegister * 4],
+               pConstantData,
+               Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
+
+        nine_ranges_insert(&state->changed.ps_const_f,
+                           StartRegister, StartRegister + Vector4fCount,
+                           &This->range_pool);
+
+        state->changed.group |= NINE_STATE_PS_CONST;
+        return D3D_OK;
     }
 
+    if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
+                Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
+        return D3D_OK;
+
     memcpy(&state->ps_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
 
-    nine_ranges_insert(&state->changed.ps_const_f,
-                       StartRegister, StartRegister + Vector4fCount,
-                       &This->range_pool);
-
-    state->changed.group |= NINE_STATE_PS_CONST;
+    nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData,
+                                             Vector4fCount * 4 * sizeof(state->ps_const_f[0]),
+                                             Vector4fCount);
 
     return D3D_OK;
 }
@@ -3803,8 +3717,13 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
             state->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
         }
     }
-    state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
-    state->changed.group |= NINE_STATE_PS_CONST;
+
+    if (unlikely(This->is_recording)) {
+        state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
+        state->changed.group |= NINE_STATE_PS_CONST;
+    } else
+        nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData,
+                                                 sizeof(state->ps_const_i[0]) * Vector4iCount, Vector4iCount);
 
     return D3D_OK;
 }
@@ -3868,8 +3787,12 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
     for (i = 0; i < BoolCount; i++)
         state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
 
-    state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister;
-    state->changed.group |= NINE_STATE_PS_CONST;
+    if (unlikely(This->is_recording)) {
+        state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister;
+        state->changed.group |= NINE_STATE_PS_CONST;
+    } else
+        nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData,
+                                                 sizeof(BOOL) * BoolCount, BoolCount);
 
     return D3D_OK;
 }