st/nine: Handle Window Occlusion
[mesa.git] / src / gallium / state_trackers / nine / device9.c
index e9599b853c021f939783257dc41722c00a5690cd..cb6c813b650a9fff8d45b172883ba6cdc7d1f189 100644 (file)
@@ -41,6 +41,7 @@
 
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
+#include "pipe/p_config.h"
 #include "util/u_math.h"
 #include "util/u_inlines.h"
 #include "util/u_hash_table.h"
 
 #define DBG_CHANNEL DBG_DEVICE
 
-static void
+#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
+#include <fpu_control.h>
+
+static void nine_setup_fpu()
+{
+    fpu_control_t c;
+
+    _FPU_GETCW(c);
+    /* clear the control word */
+    c &= _FPU_RESERVED;
+    /* d3d9 doc/wine tests: mask all exceptions, use single-precision
+     * and round to nearest */
+    c |= _FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM |
+         _FPU_MASK_UM | _FPU_MASK_PM | _FPU_SINGLE | _FPU_RC_NEAREST;
+    _FPU_SETCW(c);
+}
+
+#else
+
+static void nine_setup_fpu(void)
+{
+    WARN_ONCE("FPU setup not supported on non-x86 platforms\n");
+}
+
+#endif
+
+void
 NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
 {
     struct NineSurface9 *refSurf = NULL;
@@ -62,7 +90,7 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
 
     assert(!This->is_recording);
 
-    nine_state_set_defaults(&This->state, &This->caps, is_reset);
+    nine_state_set_defaults(This, &This->caps, is_reset);
 
     This->state.viewport.X = 0;
     This->state.viewport.Y = 0;
@@ -91,48 +119,6 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
             This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
 }
 
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask )
-{
-    struct pipe_context *pipe = This->pipe;
-
-    DBG("This=%p mask=%u\n", This, mask);
-
-    if (mask & 0x1) {
-        struct pipe_constant_buffer cb;
-        cb.buffer_offset = 0;
-
-        if (This->prefer_user_constbuf) {
-            cb.buffer = NULL;
-            cb.user_buffer = This->state.vs_const_f;
-        } else {
-            cb.buffer = This->constbuf_vs;
-            cb.user_buffer = NULL;
-        }
-        cb.buffer_size = This->constbuf_vs->width0;
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
-
-        if (This->prefer_user_constbuf) {
-            cb.user_buffer = This->state.ps_const_f;
-        } else {
-            cb.buffer = This->constbuf_ps;
-        }
-        cb.buffer_size = This->constbuf_ps->width0;
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
-    }
-
-    if (mask & 0x2) {
-        struct pipe_poly_stipple stipple;
-        memset(&stipple, ~0, sizeof(stipple));
-        pipe->set_polygon_stipple(pipe, &stipple);
-    }
-
-    This->state.changed.group = NINE_STATE_ALL;
-    This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1;
-    This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
-    This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
-}
-
 #define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n)
 HRESULT
 NineDevice9_ctor( struct NineDevice9 *This,
@@ -145,7 +131,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
                   ID3DPresentGroup *pPresentationGroup,
                   struct d3dadapter9_context *pCTX,
                   boolean ex,
-                  D3DDISPLAYMODEEX *pFullscreenDisplayMode )
+                  D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+                  int minorVersionNum )
 {
     unsigned i;
     HRESULT hr = NineUnknown_ctor(&This->base, pParams);
@@ -158,6 +145,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
     if (FAILED(hr)) { return hr; }
 
     list_inithead(&This->update_textures);
+    list_inithead(&This->managed_textures);
 
     This->screen = pScreen;
     This->caps = *pCaps;
@@ -165,10 +153,20 @@ NineDevice9_ctor( struct NineDevice9 *This,
     This->params = *pCreationParameters;
     This->ex = ex;
     This->present = pPresentationGroup;
+    This->minor_version_num = minorVersionNum;
+
     IDirect3D9_AddRef(This->d3d9);
     ID3DPresentGroup_AddRef(This->present);
 
-    This->pipe = This->screen->context_create(This->screen, NULL);
+    if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE))
+        nine_setup_fpu();
+
+    if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING)
+        DBG("Application asked full Software Vertex Processing. Ignoring.\n");
+    if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
+        DBG("Application asked mixed Software Vertex Processing. Ignoring.\n");
+
+    This->pipe = This->screen->context_create(This->screen, NULL, 0);
     if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
 
     This->cso = cso_create_context(This->pipe);
@@ -221,6 +219,42 @@ NineDevice9_ctor( struct NineDevice9 *This,
         NineUnknown_ConvertRefToBind(NineUnknown(This->state.rt[i]));
     }
 
+    /* Initialize a dummy VBO to be used when a a vertex declaration does not
+     * specify all the inputs needed by vertex shader, on win default behavior
+     * is to pass 0,0,0,0 to the shader */
+    {
+        struct pipe_transfer *transfer;
+        struct pipe_resource tmpl;
+        struct pipe_box box;
+        unsigned char *data;
+
+        tmpl.target = PIPE_BUFFER;
+        tmpl.format = PIPE_FORMAT_R8_UNORM;
+        tmpl.width0 = 16; /* 4 floats */
+        tmpl.height0 = 1;
+        tmpl.depth0 = 1;
+        tmpl.array_size = 1;
+        tmpl.last_level = 0;
+        tmpl.nr_samples = 0;
+        tmpl.usage = PIPE_USAGE_DEFAULT;
+        tmpl.bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
+        tmpl.flags = 0;
+        This->dummy_vbo = pScreen->resource_create(pScreen, &tmpl);
+
+        if (!This->dummy_vbo)
+            return D3DERR_OUTOFVIDEOMEMORY;
+
+        u_box_1d(0, 16, &box);
+        data = This->pipe->transfer_map(This->pipe, This->dummy_vbo, 0,
+                                        PIPE_TRANSFER_WRITE |
+                                        PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+                                        &box, &transfer);
+        assert(data);
+        assert(transfer);
+        memset(data, 0, 16);
+        This->pipe->transfer_unmap(This->pipe, transfer);
+    }
+
     This->cursor.software = FALSE;
     This->cursor.hotspot.x = -1;
     This->cursor.hotspot.y = -1;
@@ -248,29 +282,39 @@ NineDevice9_ctor( struct NineDevice9 *This,
         struct pipe_resource tmpl;
         unsigned max_const_vs, max_const_ps;
 
+        /* vs 3.0: >= 256 float constants, but for cards with exactly 256 slots,
+         * we have to take in some more slots for int and bool*/
         max_const_vs = _min(pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX,
                                 PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE) /
                                 sizeof(float[4]),
-                           NINE_MAX_CONST_ALL);
-        max_const_ps = _min(pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT,
-                                PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE) /
-                                sizeof(float[4]),
                             NINE_MAX_CONST_ALL);
+        /* ps 3.0: 224 float constants. All cards supported support at least
+         * 256 constants for ps */
+        max_const_ps = NINE_MAX_CONST_F_PS3 + (NINE_MAX_CONST_I + NINE_MAX_CONST_B / 4);
 
         This->max_vs_const_f = max_const_vs -
                                (NINE_MAX_CONST_I + NINE_MAX_CONST_B / 4);
         This->max_ps_const_f = max_const_ps -
                                (NINE_MAX_CONST_I + NINE_MAX_CONST_B / 4);
 
+        This->vs_const_size = max_const_vs * sizeof(float[4]);
+        This->ps_const_size = max_const_ps * sizeof(float[4]);
         /* Include space for I,B constants for user constbuf. */
-        This->state.vs_const_f = CALLOC(NINE_MAX_CONST_ALL, sizeof(float[4]));
-        This->state.ps_const_f = CALLOC(NINE_MAX_CONST_ALL, sizeof(float[4]));
-        if (!This->state.vs_const_f || !This->state.ps_const_f)
+        This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
+        This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
+        This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+        This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
+        if (!This->state.vs_const_f || !This->state.ps_const_f ||
+            !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp)
             return E_OUTOFMEMORY;
 
         if (strstr(pScreen->get_name(pScreen), "AMD") ||
-            strstr(pScreen->get_name(pScreen), "ATI"))
-            This->prefer_user_constbuf = TRUE;
+            strstr(pScreen->get_name(pScreen), "ATI")) {
+            This->driver_bugs.buggy_barycentrics = TRUE;
+        }
+
+        /* Disable NV path for now, needs some fixes */
+        This->prefer_user_constbuf = TRUE;
 
         tmpl.target = PIPE_BUFFER;
         tmpl.format = PIPE_FORMAT_R8_UNORM;
@@ -283,45 +327,102 @@ NineDevice9_ctor( struct NineDevice9 *This,
         tmpl.bind = PIPE_BIND_CONSTANT_BUFFER;
         tmpl.flags = 0;
 
-        tmpl.width0 = max_const_vs * sizeof(float[4]);
+        tmpl.width0 = This->vs_const_size;
         This->constbuf_vs = pScreen->resource_create(pScreen, &tmpl);
 
-        tmpl.width0 = max_const_ps * sizeof(float[4]);
+        tmpl.width0 = This->ps_const_size;
         This->constbuf_ps = pScreen->resource_create(pScreen, &tmpl);
 
         if (!This->constbuf_vs || !This->constbuf_ps)
             return E_OUTOFMEMORY;
     }
 
-    This->vs_bool_true = pScreen->get_shader_param(pScreen,
-        PIPE_SHADER_VERTEX,
-        PIPE_SHADER_CAP_INTEGERS) ? 0xFFFFFFFF : fui(1.0f);
-    This->ps_bool_true = pScreen->get_shader_param(pScreen,
-        PIPE_SHADER_FRAGMENT,
-        PIPE_SHADER_CAP_INTEGERS) ? 0xFFFFFFFF : fui(1.0f);
+    /* allocate dummy texture/sampler for when there are missing ones bound */
+    {
+        struct pipe_resource tmplt;
+        struct pipe_sampler_view templ;
+        struct pipe_sampler_state samp;
+        memset(&samp, 0, sizeof(samp));
+
+        tmplt.target = PIPE_TEXTURE_2D;
+        tmplt.width0 = 1;
+        tmplt.height0 = 1;
+        tmplt.depth0 = 1;
+        tmplt.last_level = 0;
+        tmplt.array_size = 1;
+        tmplt.usage = PIPE_USAGE_DEFAULT;
+        tmplt.flags = 0;
+        tmplt.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+        tmplt.bind = PIPE_BIND_SAMPLER_VIEW;
+        tmplt.nr_samples = 0;
+
+        This->dummy_texture = This->screen->resource_create(This->screen, &tmplt);
+        if (!This->dummy_texture)
+            return D3DERR_DRIVERINTERNALERROR;
+
+        templ.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+        templ.u.tex.first_layer = 0;
+        templ.u.tex.last_layer = 0;
+        templ.u.tex.first_level = 0;
+        templ.u.tex.last_level = 0;
+        templ.swizzle_r = PIPE_SWIZZLE_ZERO;
+        templ.swizzle_g = PIPE_SWIZZLE_ZERO;
+        templ.swizzle_b = PIPE_SWIZZLE_ZERO;
+        templ.swizzle_a = PIPE_SWIZZLE_ONE;
+        templ.target = This->dummy_texture->target;
+
+        This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
+        if (!This->dummy_sampler_view)
+            return D3DERR_DRIVERINTERNALERROR;
+
+        samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+        samp.max_lod = 15.0f;
+        samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+        samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+        samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+        samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+        samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+        samp.compare_mode = PIPE_TEX_COMPARE_NONE;
+        samp.compare_func = PIPE_FUNC_LEQUAL;
+        samp.normalized_coords = 1;
+        samp.seamless_cube_map = 1;
+        This->dummy_sampler_state = samp;
+    }
 
     /* Allocate upload helper for drivers that suck (from st pov ;). */
-    {
-        unsigned bind = 0;
 
-        This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
-        This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
+    This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
 
-        if (!This->driver_caps.user_vbufs) bind |= PIPE_BIND_VERTEX_BUFFER;
-        if (!This->driver_caps.user_ibufs) bind |= PIPE_BIND_INDEX_BUFFER;
-        if (bind)
-            This->upload = u_upload_create(This->pipe, 1 << 20, 4, bind);
+    if (!This->driver_caps.user_vbufs)
+        This->vertex_uploader = u_upload_create(This->pipe, 65536,
+                                                PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
+    if (!This->driver_caps.user_ibufs)
+        This->index_uploader = u_upload_create(This->pipe, 128 * 1024,
+                                               PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM);
+    if (!This->driver_caps.user_cbufs) {
+        This->constbuf_alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT);
+        This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size,
+                                                  PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
     }
 
     This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
+    This->driver_caps.vs_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS);
+    This->driver_caps.ps_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
 
     nine_ff_init(This); /* initialize fixed function code */
 
     NineDevice9_SetDefaultState(This, FALSE);
-    NineDevice9_RestoreNonCSOState(This, ~0);
+
+    {
+        struct pipe_poly_stipple stipple;
+        memset(&stipple, ~0, sizeof(stipple));
+        This->pipe->set_polygon_stipple(This->pipe, &stipple);
+    }
 
     This->update = &This->state;
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     ID3DPresentGroup_Release(This->present);
 
@@ -341,26 +442,35 @@ NineDevice9_dtor( struct NineDevice9 *This )
     nine_ff_fini(This);
     nine_state_clear(&This->state, TRUE);
 
-    if (This->upload)
-        u_upload_destroy(This->upload);
+    if (This->vertex_uploader)
+        u_upload_destroy(This->vertex_uploader);
+    if (This->index_uploader)
+        u_upload_destroy(This->index_uploader);
+    if (This->constbuf_uploader)
+        u_upload_destroy(This->constbuf_uploader);
 
     nine_bind(&This->record, NULL);
 
+    pipe_sampler_view_reference(&This->dummy_sampler_view, NULL);
+    pipe_resource_reference(&This->dummy_texture, NULL);
     pipe_resource_reference(&This->constbuf_vs, NULL);
     pipe_resource_reference(&This->constbuf_ps, NULL);
+    pipe_resource_reference(&This->dummy_vbo, NULL);
     FREE(This->state.vs_const_f);
     FREE(This->state.ps_const_f);
+    FREE(This->state.vs_lconstf_temp);
+    FREE(This->state.ps_lconstf_temp);
 
     if (This->swapchains) {
         for (i = 0; i < This->nswapchains; ++i)
-            NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
+            if (This->swapchains[i])
+                NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
         FREE(This->swapchains);
     }
 
     /* state stuff */
     if (This->pipe) {
         if (This->cso) {
-            cso_release_all(This->cso);
             cso_destroy_context(This->cso);
         }
         if (This->pipe->destroy) { This->pipe->destroy(This->pipe); }
@@ -396,7 +506,7 @@ NineDevice9_GetCaps( struct NineDevice9 *This )
     return &This->caps;
 }
 
-static INLINE void
+static inline void
 NineDevice9_PauseRecording( struct NineDevice9 *This )
 {
     if (This->record) {
@@ -405,7 +515,7 @@ NineDevice9_PauseRecording( struct NineDevice9 *This )
     }
 }
 
-static INLINE void
+static inline void
 NineDevice9_ResumeRecording( struct NineDevice9 *This )
 {
     if (This->record) {
@@ -417,7 +527,14 @@ NineDevice9_ResumeRecording( struct NineDevice9 *This )
 HRESULT WINAPI
 NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
 {
-    return D3D_OK; /* TODO */
+    if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+        This->device_needs_reset = TRUE;
+        return D3DERR_DEVICELOST;
+    } else if (This->device_needs_reset) {
+        return D3DERR_DEVICENOTRESET;
+    }
+
+    return D3D_OK;
 }
 
 UINT WINAPI
@@ -433,10 +550,14 @@ NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
 HRESULT WINAPI
 NineDevice9_EvictManagedResources( struct NineDevice9 *This )
 {
-    /* We don't really need to do anything here, but might want to free up
-     * the GPU virtual address space by killing pipe_resources.
-     */
-    STUB(D3D_OK);
+    struct NineBaseTexture9 *tex;
+
+    DBG("This=%p\n", This);
+    LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) {
+        NineBaseTexture9_UnLoad(tex);
+    }
+
+    return D3D_OK;
 }
 
 HRESULT WINAPI
@@ -485,11 +606,11 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
                                  UINT YHotSpot,
                                  IDirect3DSurface9 *pCursorBitmap )
 {
-    /* TODO: hardware cursor */
     struct NineSurface9 *surf = NineSurface9(pCursorBitmap);
     struct pipe_context *pipe = This->pipe;
     struct pipe_box box;
     struct pipe_transfer *transfer;
+    BOOL hw_cursor;
     void *ptr;
 
     DBG_FLAG(DBG_SWAPCHAIN, "This=%p XHotSpot=%u YHotSpot=%u "
@@ -497,8 +618,15 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
 
     user_assert(pCursorBitmap, D3DERR_INVALIDCALL);
 
-    This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0);
-    This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0);
+    if (This->swapchains[0]->params.Windowed) {
+        This->cursor.w = MIN2(surf->desc.Width, 32);
+        This->cursor.h = MIN2(surf->desc.Height, 32);
+        hw_cursor = 1; /* always use hw cursor for windowed mode */
+    } else {
+        This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0);
+        This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0);
+        hw_cursor = This->cursor.w == 32 && This->cursor.h == 32;
+    }
 
     u_box_origin_2d(This->cursor.w, This->cursor.h, &box);
 
@@ -529,16 +657,21 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
                                  lock.pBits, lock.Pitch,
                                  This->cursor.w, This->cursor.h);
 
-        if (!This->cursor.software &&
-            This->cursor.w == 32 && This->cursor.h == 32)
-            ID3DPresent_SetCursor(This->swapchains[0]->present,
-                                  lock.pBits, &This->cursor.hotspot,
-                                  This->cursor.visible);
+        if (hw_cursor)
+            hw_cursor = ID3DPresent_SetCursor(This->swapchains[0]->present,
+                                              lock.pBits,
+                                              &This->cursor.hotspot,
+                                              This->cursor.visible) == D3D_OK;
 
         NineSurface9_UnlockRect(surf);
     }
     pipe->transfer_unmap(pipe, transfer);
 
+    /* hide cursor if we emulate it */
+    if (!hw_cursor)
+        ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, FALSE);
+    This->cursor.software = !hw_cursor;
+
     return D3D_OK;
 }
 
@@ -556,7 +689,7 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This,
     This->cursor.pos.y = Y;
 
     if (!This->cursor.software)
-        ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos);
+        This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK;
 }
 
 BOOL WINAPI
@@ -569,7 +702,7 @@ NineDevice9_ShowCursor( struct NineDevice9 *This,
 
     This->cursor.visible = bShow && (This->cursor.hotspot.x != -1);
     if (!This->cursor.software)
-        ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow);
+        This->cursor.software = ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow) != D3D_OK;
 
     return old;
 }
@@ -587,6 +720,11 @@ NineDevice9_CreateAdditionalSwapChain( struct NineDevice9 *This,
         This, pPresentationParameters, pSwapChain);
 
     user_assert(pPresentationParameters, D3DERR_INVALIDCALL);
+    user_assert(tmplt->params.Windowed && pPresentationParameters->Windowed, D3DERR_INVALIDCALL);
+
+    /* TODO: this deserves more tests */
+    if (!pPresentationParameters->hDeviceWindow)
+        pPresentationParameters->hDeviceWindow = This->params.hFocusWindow;
 
     hr = ID3DPresentGroup_CreateAdditionalPresent(This->present, pPresentationParameters, &present);
 
@@ -635,11 +773,16 @@ NineDevice9_Reset( struct NineDevice9 *This,
 
     DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
 
+    if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+        This->device_needs_reset = TRUE;
+        return D3DERR_DEVICELOST;
+    }
+
     for (i = 0; i < This->nswapchains; ++i) {
         D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
         hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL);
-        if (FAILED(hr))
-            return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST;
+        if (hr != D3D_OK)
+            break;
     }
 
     nine_pipe_context_clear(This);
@@ -650,6 +793,7 @@ NineDevice9_Reset( struct NineDevice9 *This,
         This, 0, (IDirect3DSurface9 *)This->swapchains[0]->buffers[0]);
     /* XXX: better use GetBackBuffer here ? */
 
+    This->device_needs_reset = (hr != D3D_OK);
     return hr;
 }
 
@@ -684,6 +828,8 @@ NineDevice9_GetBackBuffer( struct NineDevice9 *This,
                            IDirect3DSurface9 **ppBackBuffer )
 {
     user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
+    /* return NULL on error */
+    *ppBackBuffer = NULL;
     user_assert(iSwapChain < This->nswapchains, D3DERR_INVALIDCALL);
 
     return NineSwapChain9_GetBackBuffer(This->swapchains[iSwapChain],
@@ -765,6 +911,7 @@ NineDevice9_CreateTexture( struct NineDevice9 *This,
              D3DUSAGE_DYNAMIC | D3DUSAGE_NONSECURE | D3DUSAGE_RENDERTARGET |
              D3DUSAGE_SOFTWAREPROCESSING | D3DUSAGE_TEXTAPI;
 
+    *ppTexture = NULL;
     user_assert(Width && Height, D3DERR_INVALIDCALL);
     user_assert(!pSharedHandle || This->ex, D3DERR_INVALIDCALL);
     /* When is used shared handle, Pool must be
@@ -806,6 +953,7 @@ NineDevice9_CreateVolumeTexture( struct NineDevice9 *This,
     Usage &= D3DUSAGE_DYNAMIC | D3DUSAGE_NONSECURE |
              D3DUSAGE_SOFTWAREPROCESSING;
 
+    *ppVolumeTexture = NULL;
     user_assert(Width && Height && Depth, D3DERR_INVALIDCALL);
     user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
 
@@ -839,6 +987,7 @@ NineDevice9_CreateCubeTexture( struct NineDevice9 *This,
              D3DUSAGE_NONSECURE | D3DUSAGE_RENDERTARGET |
              D3DUSAGE_SOFTWAREPROCESSING;
 
+    *ppCubeTexture = NULL;
     user_assert(EdgeLength, D3DERR_INVALIDCALL);
     user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
 
@@ -957,7 +1106,6 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
     user_assert(Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL);
 
     templ.target = PIPE_TEXTURE_2D;
-    templ.format = d3d9_to_pipe_format(Format);
     templ.width0 = Width;
     templ.height0 = Height;
     templ.depth0 = 1;
@@ -969,11 +1117,14 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
     templ.bind = PIPE_BIND_SAMPLER_VIEW; /* StretchRect */
     switch (type) {
     case 0: templ.bind |= PIPE_BIND_RENDER_TARGET; break;
-    case 1: templ.bind |= PIPE_BIND_DEPTH_STENCIL; break;
+    case 1: templ.bind = d3d9_get_pipe_depth_format_bindings(Format); break;
     default:
         assert(type == 2);
         break;
     }
+    templ.format = d3d9_to_pipe_format_checked(screen, Format, templ.target,
+                                               templ.nr_samples, templ.bind,
+                                               FALSE);
 
     desc.Format = Format;
     desc.Type = D3DRTYPE_SURFACE;
@@ -989,9 +1140,16 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
     default: break;
     }
 
+    if (compressed_format(Format)) {
+        const unsigned w = util_format_get_blockwidth(templ.format);
+        const unsigned h = util_format_get_blockheight(templ.format);
+
+        user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+    }
+
     if (Pool == D3DPOOL_DEFAULT && Format != D3DFMT_NULL) {
         /* resource_create doesn't return an error code, so check format here */
-        user_assert(CHECK_PIPE_RESOURCE_TEMPLATE(templ), D3DERR_INVALIDCALL);
+        user_assert(templ.format != PIPE_FORMAT_NONE, D3DERR_INVALIDCALL);
         resource = screen->resource_create(screen, &templ);
         user_assert(resource, D3DERR_OUTOFVIDEOMEMORY);
         if (Discard_or_Lockable && (desc.Usage & D3DUSAGE_RENDERTARGET))
@@ -1018,6 +1176,7 @@ NineDevice9_CreateRenderTarget( struct NineDevice9 *This,
                                 IDirect3DSurface9 **ppSurface,
                                 HANDLE *pSharedHandle )
 {
+    *ppSurface = NULL;
     return create_zs_or_rt_surface(This, 0, D3DPOOL_DEFAULT,
                                    Width, Height, Format,
                                    MultiSample, MultisampleQuality,
@@ -1035,6 +1194,9 @@ NineDevice9_CreateDepthStencilSurface( struct NineDevice9 *This,
                                        IDirect3DSurface9 **ppSurface,
                                        HANDLE *pSharedHandle )
 {
+    *ppSurface = NULL;
+    if (!depth_stencil_format(Format))
+        return D3DERR_NOTAVAILABLE;
     return create_zs_or_rt_surface(This, 1, D3DPOOL_DEFAULT,
                                    Width, Height, Format,
                                    MultiSample, MultisampleQuality,
@@ -1050,6 +1212,8 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This,
 {
     struct NineSurface9 *dst = NineSurface9(pDestinationSurface);
     struct NineSurface9 *src = NineSurface9(pSourceSurface);
+    int copy_width, copy_height;
+    RECT destRect;
 
     DBG("This=%p pSourceSurface=%p pDestinationSurface=%p "
         "pSourceRect=%p pDestPoint=%p\n", This,
@@ -1061,13 +1225,75 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This,
     if (pDestPoint)
         DBG("pDestPoint = (%u,%u)\n", pDestPoint->x, pDestPoint->y);
 
+    user_assert(dst && src, D3DERR_INVALIDCALL);
+
     user_assert(dst->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
     user_assert(src->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
 
     user_assert(dst->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL);
     user_assert(src->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL);
 
-    return NineSurface9_CopySurface(dst, src, pDestPoint, pSourceRect);
+    user_assert(!src->lock_count, D3DERR_INVALIDCALL);
+    user_assert(!dst->lock_count, D3DERR_INVALIDCALL);
+
+    user_assert(dst->desc.Format == src->desc.Format, D3DERR_INVALIDCALL);
+    user_assert(!depth_stencil_format(dst->desc.Format), D3DERR_INVALIDCALL);
+
+    if (pSourceRect) {
+        copy_width = pSourceRect->right - pSourceRect->left;
+        copy_height = pSourceRect->bottom - pSourceRect->top;
+
+        user_assert(pSourceRect->left >= 0 &&
+                    copy_width > 0 &&
+                    pSourceRect->right <= src->desc.Width &&
+                    pSourceRect->top >= 0 &&
+                    copy_height > 0 &&
+                    pSourceRect->bottom <= src->desc.Height,
+                    D3DERR_INVALIDCALL);
+    } else {
+        copy_width = src->desc.Width;
+        copy_height = src->desc.Height;
+    }
+
+    destRect.right = copy_width;
+    destRect.bottom = copy_height;
+
+    if (pDestPoint) {
+        user_assert(pDestPoint->x >= 0 && pDestPoint->y >= 0,
+                    D3DERR_INVALIDCALL);
+        destRect.right += pDestPoint->x;
+        destRect.bottom += pDestPoint->y;
+    }
+
+    user_assert(destRect.right <= dst->desc.Width &&
+                destRect.bottom <= dst->desc.Height,
+                D3DERR_INVALIDCALL);
+
+    if (compressed_format(dst->desc.Format)) {
+        const unsigned w = util_format_get_blockwidth(dst->base.info.format);
+        const unsigned h = util_format_get_blockheight(dst->base.info.format);
+
+        if (pDestPoint) {
+            user_assert(!(pDestPoint->x % w) && !(pDestPoint->y % h),
+                        D3DERR_INVALIDCALL);
+        }
+
+        if (pSourceRect) {
+            user_assert(!(pSourceRect->left % w) && !(pSourceRect->top % h),
+                        D3DERR_INVALIDCALL);
+        }
+        if (!(copy_width == src->desc.Width &&
+              copy_width == dst->desc.Width &&
+              copy_height == src->desc.Height &&
+              copy_height == dst->desc.Height)) {
+            user_assert(!(copy_width  % w) && !(copy_height % h),
+                        D3DERR_INVALIDCALL);
+        }
+    }
+
+    NineSurface9_CopyMemToDefault(dst, src, pDestPoint, pSourceRect);
+
+    return D3D_OK;
 }
 
 HRESULT WINAPI
@@ -1079,6 +1305,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
     struct NineBaseTexture9 *srcb = NineBaseTexture9(pSourceTexture);
     unsigned l, m;
     unsigned last_level = dstb->base.info.last_level;
+    RECT rect;
 
     DBG("This=%p pSourceTexture=%p pDestinationTexture=%p\n", This,
         pSourceTexture, pDestinationTexture);
@@ -1091,16 +1318,19 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
     if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) {
         /* Only the first level is updated, the others regenerated. */
         last_level = 0;
+        /* if the source has D3DUSAGE_AUTOGENMIPMAP, we have to ignore
+         * the sublevels, thus level 0 has to match */
+        user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP) ||
+                    (srcb->base.info.width0 == dstb->base.info.width0 &&
+                     srcb->base.info.height0 == dstb->base.info.height0 &&
+                     srcb->base.info.depth0 == dstb->base.info.depth0),
+                    D3DERR_INVALIDCALL);
     } else {
         user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP), D3DERR_INVALIDCALL);
     }
 
     user_assert(dstb->base.type == srcb->base.type, D3DERR_INVALIDCALL);
 
-    /* TODO: We can restrict the update to the dirty portions of the source.
-     * Yes, this seems silly, but it's what MSDN says ...
-     */
-
     /* Find src level that matches dst level 0: */
     user_assert(srcb->base.info.width0 >= dstb->base.info.width0 &&
                 srcb->base.info.height0 >= dstb->base.info.height0 &&
@@ -1124,9 +1354,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
         struct NineTexture9 *dst = NineTexture9(dstb);
         struct NineTexture9 *src = NineTexture9(srcb);
 
-        for (l = 0; l <= last_level; ++l, ++m)
-            NineSurface9_CopySurface(dst->surfaces[l],
-                                     src->surfaces[m], NULL, NULL);
+        if (src->dirty_rect.width == 0)
+            return D3D_OK;
+
+        pipe_box_to_rect(&rect, &src->dirty_rect);
+        for (l = 0; l < m; ++l)
+            rect_minify_inclusive(&rect);
+
+        for (l = 0; l <= last_level; ++l, ++m) {
+            fit_rect_format_inclusive(dst->base.base.info.format,
+                                      &rect,
+                                      dst->surfaces[l]->desc.Width,
+                                      dst->surfaces[l]->desc.Height);
+            NineSurface9_CopyMemToDefault(dst->surfaces[l],
+                                          src->surfaces[m],
+                                          (POINT *)&rect,
+                                          &rect);
+            rect_minify_inclusive(&rect);
+        }
+        u_box_origin_2d(0, 0, &src->dirty_rect);
     } else
     if (dstb->base.type == D3DRTYPE_CUBETEXTURE) {
         struct NineCubeTexture9 *dst = NineCubeTexture9(dstb);
@@ -1135,10 +1381,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
 
         /* GPUs usually have them stored as arrays of mip-mapped 2D textures. */
         for (z = 0; z < 6; ++z) {
+            if (src->dirty_rect[z].width == 0)
+                continue;
+
+            pipe_box_to_rect(&rect, &src->dirty_rect[z]);
+            for (l = 0; l < m; ++l)
+                rect_minify_inclusive(&rect);
+
             for (l = 0; l <= last_level; ++l, ++m) {
-                NineSurface9_CopySurface(dst->surfaces[l * 6 + z],
-                                         src->surfaces[m * 6 + z], NULL, NULL);
+                fit_rect_format_inclusive(dst->base.base.info.format,
+                                          &rect,
+                                          dst->surfaces[l * 6 + z]->desc.Width,
+                                          dst->surfaces[l * 6 + z]->desc.Height);
+                NineSurface9_CopyMemToDefault(dst->surfaces[l * 6 + z],
+                                              src->surfaces[m * 6 + z],
+                                              (POINT *)&rect,
+                                              &rect);
+                rect_minify_inclusive(&rect);
             }
+            u_box_origin_2d(0, 0, &src->dirty_rect[z]);
             m -= l;
         }
     } else
@@ -1146,15 +1407,20 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
         struct NineVolumeTexture9 *dst = NineVolumeTexture9(dstb);
         struct NineVolumeTexture9 *src = NineVolumeTexture9(srcb);
 
+        if (src->dirty_box.width == 0)
+            return D3D_OK;
         for (l = 0; l <= last_level; ++l, ++m)
-            NineVolume9_CopyVolume(dst->volumes[l],
-                                   src->volumes[m], 0, 0, 0, NULL);
+            NineVolume9_CopyMemToDefault(dst->volumes[l],
+                                         src->volumes[m], 0, 0, 0, NULL);
+        u_box_3d(0, 0, 0, 0, 0, 0, &src->dirty_box);
     } else{
         assert(!"invalid texture type");
     }
 
-    if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP)
+    if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) {
+        dstb->dirty_mip = TRUE;
         NineBaseTexture9_GenerateMipSubLevels(dstb);
+    }
 
     return D3D_OK;
 }
@@ -1176,7 +1442,12 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
     user_assert(dst->desc.MultiSampleType < 2, D3DERR_INVALIDCALL);
     user_assert(src->desc.MultiSampleType < 2, D3DERR_INVALIDCALL);
 
-    return NineSurface9_CopySurface(dst, src, NULL, NULL);
+    user_assert(src->desc.Width == dst->desc.Width, D3DERR_INVALIDCALL);
+    user_assert(src->desc.Height == dst->desc.Height, D3DERR_INVALIDCALL);
+
+    NineSurface9_CopyDefaultToMem(dst, src);
+
+    return D3D_OK;
 }
 
 HRESULT WINAPI
@@ -1208,7 +1479,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
     struct NineSurface9 *src = NineSurface9(pSourceSurface);
     struct pipe_resource *dst_res = NineSurface9_GetResource(dst);
     struct pipe_resource *src_res = NineSurface9_GetResource(src);
-    const boolean zs = util_format_is_depth_or_stencil(dst_res->format);
+    boolean zs;
     struct pipe_blit_info blit;
     boolean scaled, clamped, ms, flip_x = FALSE, flip_y = FALSE;
 
@@ -1220,9 +1491,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
             pSourceRect->left, pSourceRect->top,
             pSourceRect->right, pSourceRect->bottom);
     if (pDestRect)
-        DBG("pSourceRect=(%u,%u)-(%u,%u)\n", pDestRect->left, pDestRect->top,
+        DBG("pDestRect=(%u,%u)-(%u,%u)\n", pDestRect->left, pDestRect->top,
             pDestRect->right, pDestRect->bottom);
 
+    user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
+                src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
+    zs = util_format_is_depth_or_stencil(dst_res->format);
     user_assert(!zs || !This->in_scene, D3DERR_INVALIDCALL);
     user_assert(!zs || !pSourceRect ||
                 (pSourceRect->left == 0 &&
@@ -1246,8 +1520,6 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                                             src_res->nr_samples,
                                             PIPE_BIND_SAMPLER_VIEW),
                 D3DERR_INVALIDCALL);
-    user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
-                src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
 
     /* We might want to permit these, but wine thinks we shouldn't. */
     user_assert(!pDestRect ||
@@ -1257,6 +1529,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                 (pSourceRect->left <= pSourceRect->right &&
                  pSourceRect->top <= pSourceRect->bottom), D3DERR_INVALIDCALL);
 
+    memset(&blit, 0, sizeof(blit));
     blit.dst.resource = dst_res;
     blit.dst.level = dst->level;
     blit.dst.box.z = dst->layer;
@@ -1315,6 +1588,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
     blit.filter = Filter == D3DTEXF_LINEAR ?
        PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST;
     blit.scissor_enable = FALSE;
+    blit.alpha_blend = FALSE;
 
     /* If both of a src and dst dimension are negative, flip them. */
     if (blit.dst.box.width < 0 && blit.src.box.width < 0) {
@@ -1331,8 +1605,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
 
     user_assert(!scaled || dst != src, D3DERR_INVALIDCALL);
     user_assert(!scaled ||
-                !NineSurface9_IsOffscreenPlain(dst) ||
+                !NineSurface9_IsOffscreenPlain(dst), D3DERR_INVALIDCALL);
+    user_assert(!NineSurface9_IsOffscreenPlain(dst) ||
                 NineSurface9_IsOffscreenPlain(src), D3DERR_INVALIDCALL);
+    user_assert(NineSurface9_IsOffscreenPlain(dst) ||
+                dst->desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL),
+                D3DERR_INVALIDCALL);
     user_assert(!scaled ||
                 (!util_format_is_compressed(dst->base.info.format) &&
                  !util_format_is_compressed(src->base.info.format)),
@@ -1385,6 +1663,9 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
             &blit.src.box);
     }
 
+    /* Communicate the container it needs to update sublevels - if apply */
+    NineSurface9_MarkContainerDirty(dst);
+
     return D3D_OK;
 }
 
@@ -1412,6 +1693,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
     user_assert((surf->base.usage & D3DUSAGE_RENDERTARGET) ||
                 NineSurface9_IsOffscreenPlain(surf), D3DERR_INVALIDCALL);
 
+    user_assert(surf->desc.Format != D3DFMT_NULL, D3D_OK);
+
     if (pRect) {
         x = pRect->left;
         y = pRect->top;
@@ -1425,11 +1708,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
     }
     d3dcolor_to_pipe_color_union(&rgba, color);
 
-    fallback =
-        !This->screen->is_format_supported(This->screen, surf->base.info.format,
-                                           surf->base.info.target,
-                                           surf->base.info.nr_samples,
-                                           PIPE_BIND_RENDER_TARGET);
+    fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET);
+
     if (!fallback) {
         psurf = NineSurface9_GetSurface(surf, 0);
         if (!psurf)
@@ -1472,6 +1752,7 @@ NineDevice9_CreateOffscreenPlainSurface( struct NineDevice9 *This,
         Width, Height, d3dformat_to_string(Format), Format, Pool,
         ppSurface, pSharedHandle);
 
+    *ppSurface = NULL;
     user_assert(!pSharedHandle || Pool == D3DPOOL_DEFAULT
                                || Pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
     user_assert(Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL);
@@ -1600,11 +1881,15 @@ NineDevice9_Clear( struct NineDevice9 *This,
                    float Z,
                    DWORD Stencil )
 {
+    const int sRGB = This->state.rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
+    struct pipe_surface *cbuf, *zsbuf;
     struct pipe_context *pipe = This->pipe;
-    struct NineSurface9 *zsbuf = This->state.ds;
+    struct NineSurface9 *zsbuf_surf = This->state.ds;
+    struct NineSurface9 *rt;
     unsigned bufs = 0;
     unsigned r, i;
     union pipe_color_union rgba;
+    unsigned rt_mask = 0;
     D3DRECT rect;
 
     DBG("This=%p Count=%u pRects=%p Flags=%x Color=%08x Z=%f Stencil=%x\n",
@@ -1613,8 +1898,8 @@ NineDevice9_Clear( struct NineDevice9 *This,
     user_assert(This->state.ds || !(Flags & NINED3DCLEAR_DEPTHSTENCIL),
                 D3DERR_INVALIDCALL);
     user_assert(!(Flags & D3DCLEAR_STENCIL) ||
-                (zsbuf &&
-                 util_format_is_depth_and_stencil(zsbuf->base.info.format)),
+                (zsbuf_surf &&
+                 util_format_is_depth_and_stencil(zsbuf_surf->base.info.format)),
                 D3DERR_INVALIDCALL);
 #ifdef NINE_STRICT
     user_assert((Count && pRects) || (!Count && !pRects), D3DERR_INVALIDCALL);
@@ -1626,15 +1911,18 @@ NineDevice9_Clear( struct NineDevice9 *This,
         Count = 0;
 #endif
 
+    nine_update_state_framebuffer(This);
+
     if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
-    if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
-    if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+    /* Ignore Z buffer if not bound */
+    if (This->state.fb.zsbuf != NULL) {
+        if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
+        if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+    }
     if (!bufs)
         return D3D_OK;
     d3dcolor_to_pipe_color_union(&rgba, Color);
 
-    nine_update_state(This, NINE_STATE_FB);
-
     rect.x1 = This->state.viewport.X;
     rect.y1 = This->state.viewport.Y;
     rect.x2 = This->state.viewport.Width + rect.x1;
@@ -1660,25 +1948,41 @@ NineDevice9_Clear( struct NineDevice9 *This,
 
     if (rect.x1 >= This->state.fb.width || rect.y1 >= This->state.fb.height)
         return D3D_OK;
+
+    for (i = 0; i < This->caps.NumSimultaneousRTs; ++i) {
+        if (This->state.rt[i] && This->state.rt[i]->desc.Format != D3DFMT_NULL)
+            rt_mask |= 1 << i;
+    }
+
+    /* fast path, clears everything at once */
     if (!Count &&
-        rect.x1 == 0 && rect.x2 >= This->state.fb.width &&
-        rect.y1 == 0 && rect.y2 >= This->state.fb.height) {
-        /* fast path, clears everything at once */
-        DBG("fast path\n");
+        (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == This->state.rt_mask)) &&
+        rect.x1 == 0 && rect.y1 == 0 &&
+        /* Case we clear only render target. Check clear region vs rt. */
+        ((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
+         rect.x2 >= This->state.fb.width &&
+         rect.y2 >= This->state.fb.height) ||
+        /* Case we clear depth buffer (and eventually rt too).
+         * depth buffer size is always >= rt size. Compare to clear region */
+        ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
+         rect.x2 >= zsbuf_surf->desc.Width &&
+         rect.y2 >= zsbuf_surf->desc.Height))) {
+        DBG("Clear fast path\n");
         pipe->clear(pipe, bufs, &rgba, Z, Stencil);
         return D3D_OK;
     }
-    rect.x2 = MIN2(rect.x2, This->state.fb.width);
-    rect.y2 = MIN2(rect.y2, This->state.fb.height);
 
     if (!Count) {
         Count = 1;
         pRects = &rect;
     }
 
-    for (i = 0; (i < This->state.fb.nr_cbufs); ++i) {
-        if (!This->state.fb.cbufs[i] || !(Flags & D3DCLEAR_TARGET))
+    for (i = 0; i < This->caps.NumSimultaneousRTs; ++i) {
+        rt = This->state.rt[i];
+        if (!rt || rt->desc.Format == D3DFMT_NULL ||
+            !(Flags & D3DCLEAR_TARGET))
             continue; /* save space, compiler should hoist this */
+        cbuf = NineSurface9_GetSurface(rt, sRGB);
         for (r = 0; r < Count; ++r) {
             /* Don't trust users to pass these in the right order. */
             unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
@@ -1693,11 +1997,11 @@ NineDevice9_Clear( struct NineDevice9 *This,
 
             x1 = MAX2(x1, rect.x1);
             y1 = MAX2(y1, rect.y1);
-            x2 = MIN2(x2, rect.x2);
-            y2 = MIN2(y2, rect.y2);
+            x2 = MIN3(x2, rect.x2, rt->desc.Width);
+            y2 = MIN3(y2, rect.y2, rt->desc.Height);
 
             DBG("Clearing (%u..%u)x(%u..%u)\n", x1, x2, y1, y2);
-            pipe->clear_render_target(pipe, This->state.fb.cbufs[i], &rgba,
+            pipe->clear_render_target(pipe, cbuf, &rgba,
                                       x1, y1, x2 - x1, y2 - y1);
         }
     }
@@ -1719,10 +2023,12 @@ NineDevice9_Clear( struct NineDevice9 *This,
 
         x1 = MIN2(x1, rect.x1);
         y1 = MIN2(y1, rect.y1);
-        x2 = MIN2(x2, rect.x2);
-        y2 = MIN2(y2, rect.y2);
+        x2 = MIN3(x2, rect.x2, zsbuf_surf->desc.Width);
+        y2 = MIN3(y2, rect.y2, zsbuf_surf->desc.Height);
 
-        pipe->clear_depth_stencil(pipe, This->state.fb.zsbuf, bufs, Z, Stencil,
+        zsbuf = NineSurface9_GetSurface(zsbuf_surf, 0);
+        assert(zsbuf);
+        pipe->clear_depth_stencil(pipe, zsbuf, bufs, Z, Stencil,
                                   x1, y1, x2 - x1, y2 - y1);
     }
     return D3D_OK;
@@ -1852,8 +2158,10 @@ NineDevice9_SetLight( struct NineDevice9 *This,
             return E_OUTOFMEMORY;
         state->ff.num_lights = N;
 
-        for (; n < Index; ++n)
+        for (; n < Index; ++n) {
+            memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9));
             state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+        }
     }
     state->ff.light[Index] = *pLight;
 
@@ -1955,7 +2263,9 @@ NineDevice9_GetLightEnable( struct NineDevice9 *This,
     for (i = 0; i < state->ff.num_lights_active; ++i)
         if (state->ff.active_light[i] == Index)
             break;
-    *pEnable = i != state->ff.num_lights_active;
+
+    *pEnable = i != state->ff.num_lights_active ? 128 : 0; // Taken from wine
+
     return D3D_OK;
 }
 
@@ -1966,9 +2276,11 @@ NineDevice9_SetClipPlane( struct NineDevice9 *This,
 {
     struct nine_state *state = This->update;
 
-    DBG("This=%p Index=%u pPlane=%p(%f %f %f %f)\n", This, Index, pPlane,
-        pPlane ? pPlane[0] : 0.0f, pPlane ? pPlane[1] : 0.0f,
-        pPlane ? pPlane[2] : 0.0f, pPlane ? pPlane[3] : 0.0f);
+    user_assert(pPlane, D3DERR_INVALIDCALL);
+
+    DBG("This=%p Index=%u pPlane=%f %f %f %f\n", This, Index,
+        pPlane[0], pPlane[1],
+        pPlane[2], pPlane[3]);
 
     user_assert(Index < PIPE_MAX_CLIP_PLANES, D3DERR_INVALIDCALL);
 
@@ -1991,6 +2303,64 @@ NineDevice9_GetClipPlane( struct NineDevice9 *This,
     return D3D_OK;
 }
 
+#define RESZ_CODE 0x7fa05000
+
+static HRESULT
+NineDevice9_ResolveZ( struct NineDevice9 *This )
+{
+    struct nine_state *state = &This->state;
+    const struct util_format_description *desc;
+    struct NineSurface9 *source = state->ds;
+    struct NineBaseTexture9 *destination = state->texture[0];
+    struct pipe_resource *src, *dst;
+    struct pipe_blit_info blit;
+
+    DBG("RESZ resolve\n");
+
+    user_assert(source && destination &&
+                destination->base.type == D3DRTYPE_TEXTURE, D3DERR_INVALIDCALL);
+
+    src = source->base.resource;
+    dst = destination->base.resource;
+
+    user_assert(src && dst, D3DERR_INVALIDCALL);
+
+    /* check dst is depth format. we know already for src */
+    desc = util_format_description(dst->format);
+    user_assert(desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS, D3DERR_INVALIDCALL);
+
+    memset(&blit, 0, sizeof(blit));
+    blit.src.resource = src;
+    blit.src.level = 0;
+    blit.src.format = src->format;
+    blit.src.box.z = 0;
+    blit.src.box.depth = 1;
+    blit.src.box.x = 0;
+    blit.src.box.y = 0;
+    blit.src.box.width = src->width0;
+    blit.src.box.height = src->height0;
+
+    blit.dst.resource = dst;
+    blit.dst.level = 0;
+    blit.dst.format = dst->format;
+    blit.dst.box.z = 0;
+    blit.dst.box.depth = 1;
+    blit.dst.box.x = 0;
+    blit.dst.box.y = 0;
+    blit.dst.box.width = dst->width0;
+    blit.dst.box.height = dst->height0;
+
+    blit.mask = PIPE_MASK_ZS;
+    blit.filter = PIPE_TEX_FILTER_NEAREST;
+    blit.scissor_enable = FALSE;
+
+    This->pipe->blit(This->pipe, &blit);
+    return D3D_OK;
+}
+
+#define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
+#define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
+
 HRESULT WINAPI
 NineDevice9_SetRenderState( struct NineDevice9 *This,
                             D3DRENDERSTATETYPE State,
@@ -2001,6 +2371,27 @@ NineDevice9_SetRenderState( struct NineDevice9 *This,
     DBG("This=%p State=%u(%s) Value=%08x\n", This,
         State, nine_d3drs_to_string(State), Value);
 
+    /* Amd hacks (equivalent to GL extensions) */
+    if (State == D3DRS_POINTSIZE) {
+        if (Value == RESZ_CODE)
+            return NineDevice9_ResolveZ(This);
+
+        if (Value == ALPHA_TO_COVERAGE_ENABLE ||
+            Value == ALPHA_TO_COVERAGE_DISABLE) {
+            state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == ALPHA_TO_COVERAGE_ENABLE);
+            state->changed.group |= NINE_STATE_BLEND;
+            return D3D_OK;
+        }
+    }
+
+    /* NV hack */
+    if (State == D3DRS_ADAPTIVETESS_Y &&
+        (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE]))) {
+            state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC);
+            state->changed.group |= NINE_STATE_BLEND;
+            return D3D_OK;
+    }
+
     user_assert(State < Elements(state->rs), D3DERR_INVALIDCALL);
 
     if (likely(state->rs[State] != Value) || unlikely(This->is_recording)) {
@@ -2211,14 +2602,8 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
                 Stage == D3DDMAPSAMPLER ||
                 (Stage >= D3DVERTEXTEXTURESAMPLER0 &&
                  Stage <= D3DVERTEXTEXTURESAMPLER3), D3DERR_INVALIDCALL);
-    user_assert(!tex || tex->base.pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
-
-    if (unlikely(tex && tex->base.pool == D3DPOOL_SYSTEMMEM)) {
-        /* TODO: Currently not implemented. Better return error
-         * with message telling what's wrong */
-        ERR("This=%p D3DPOOL_SYSTEMMEM not implemented for SetTexture\n", This);
-        user_assert(tex->base.pool != D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
-    }
+    user_assert(!tex || (tex->base.pool != D3DPOOL_SCRATCH &&
+                tex->base.pool != D3DPOOL_SYSTEMMEM), D3DERR_INVALIDCALL);
 
     if (Stage >= D3DDMAPSAMPLER)
         Stage = Stage - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
@@ -2232,7 +2617,7 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
         if (tex) {
             state->samplers_shadow |= tex->shadow << Stage;
 
-            if ((tex->dirty | tex->dirty_mip) && LIST_IS_EMPTY(&tex->list))
+            if ((tex->managed.dirty | tex->dirty_mip) && LIST_IS_EMPTY(&tex->list))
                 list_add(&tex->list, &This->update_textures);
 
             tex->bind_count++;
@@ -2271,6 +2656,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
                                   DWORD Value )
 {
     struct nine_state *state = This->update;
+    int bumpmap_index = -1;
 
     DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value);
     nine_dump_D3DTSS_value(DBG_FF, Type, Value);
@@ -2279,6 +2665,36 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
     user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL);
 
     state->ff.tex_stage[Stage][Type] = Value;
+    switch (Type) {
+    case D3DTSS_BUMPENVMAT00:
+        bumpmap_index = 4 * Stage;
+        break;
+    case D3DTSS_BUMPENVMAT10:
+        bumpmap_index = 4 * Stage + 1;
+        break;
+    case D3DTSS_BUMPENVMAT01:
+        bumpmap_index = 4 * Stage + 2;
+        break;
+    case D3DTSS_BUMPENVMAT11:
+        bumpmap_index = 4 * Stage + 3;
+        break;
+    case D3DTSS_BUMPENVLSCALE:
+        bumpmap_index = 4 * 8 + 2 * Stage;
+        break;
+    case D3DTSS_BUMPENVLOFFSET:
+        bumpmap_index = 4 * 8 + 2 * Stage + 1;
+        break;
+    case D3DTSS_TEXTURETRANSFORMFLAGS:
+        state->changed.group |= NINE_STATE_PS1X_SHADER;
+        break;
+    default:
+        break;
+    }
+
+    if (bumpmap_index >= 0) {
+        state->bumpmap_vars[bumpmap_index] = Value;
+        state->changed.group |= NINE_STATE_PS_CONST;
+    }
 
     state->changed.group |= NINE_STATE_FF_PSSTAGES;
     state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
@@ -2323,9 +2739,11 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This,
     if (Sampler >= D3DDMAPSAMPLER)
         Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
 
-    state->samp[Sampler][Type] = Value;
-    state->changed.group |= NINE_STATE_SAMPLER;
-    state->changed.sampler[Sampler] |= 1 << Type;
+    if (state->samp[Sampler][Type] != Value || unlikely(This->is_recording)) {
+        state->samp[Sampler][Type] = Value;
+        state->changed.group |= NINE_STATE_SAMPLER;
+        state->changed.sampler[Sampler] |= 1 << Type;
+    }
 
     return D3D_OK;
 }
@@ -2457,7 +2875,7 @@ NineDevice9_GetNPatchMode( struct NineDevice9 *This )
     STUB(0);
 }
 
-static INLINE void
+static inline void
 init_draw_info(struct pipe_draw_info *info,
                struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
 {
@@ -2484,7 +2902,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This,
     DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n",
         This, PrimitiveType, StartVertex, PrimitiveCount);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = FALSE;
@@ -2517,7 +2935,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
     user_assert(This->state.idxbuf, D3DERR_INVALIDCALL);
     user_assert(This->state.vdecl, D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = TRUE;
@@ -2549,7 +2967,7 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
     user_assert(pVertexStreamZeroData && VertexStreamZeroStride,
                 D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = FALSE;
@@ -2563,13 +2981,17 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
     vtxbuf.buffer = NULL;
     vtxbuf.user_buffer = pVertexStreamZeroData;
 
-    if (!This->driver_caps.user_vbufs)
-        u_upload_data(This->upload,
+    if (!This->driver_caps.user_vbufs) {
+        u_upload_data(This->vertex_uploader,
                       0,
                       (info.max_index + 1) * VertexStreamZeroStride, /* XXX */
+                      4,
                       vtxbuf.user_buffer,
                       &vtxbuf.buffer_offset,
                       &vtxbuf.buffer);
+        u_upload_unmap(This->vertex_uploader);
+        vtxbuf.user_buffer = NULL;
+    }
 
     This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vtxbuf);
 
@@ -2611,7 +3033,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
     user_assert(IndexDataFormat == D3DFMT_INDEX16 ||
                 IndexDataFormat == D3DFMT_INDEX32, D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = TRUE;
@@ -2632,23 +3054,30 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
 
     if (!This->driver_caps.user_vbufs) {
         const unsigned base = info.min_index * VertexStreamZeroStride;
-        u_upload_data(This->upload,
+        u_upload_data(This->vertex_uploader,
                       base,
                       (info.max_index -
                        info.min_index + 1) * VertexStreamZeroStride, /* XXX */
+                      4,
                       (const uint8_t *)vbuf.user_buffer + base,
                       &vbuf.buffer_offset,
                       &vbuf.buffer);
+        u_upload_unmap(This->vertex_uploader);
         /* Won't be used: */
         vbuf.buffer_offset -= base;
+        vbuf.user_buffer = NULL;
     }
-    if (!This->driver_caps.user_ibufs)
-        u_upload_data(This->upload,
+    if (!This->driver_caps.user_ibufs) {
+        u_upload_data(This->index_uploader,
                       0,
                       info.count * ibuf.index_size,
+                      4,
                       ibuf.user_buffer,
                       &ibuf.offset,
                       &ibuf.buffer);
+        u_upload_unmap(This->index_uploader);
+        ibuf.user_buffer = NULL;
+    }
 
     This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vbuf);
     This->pipe->set_index_buffer(This->pipe, &ibuf);
@@ -2695,7 +3124,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
     if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS))
         STUB(D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     /* TODO: Create shader with stream output. */
     STUB(D3DERR_INVALIDCALL);
@@ -2722,7 +3151,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
         buffer_offset = 0;
     } else {
         /* SO matches vertex declaration */
-        resource = dst->base.resource;
+        resource = NineVertexBuffer9_GetResource(dst);
         buffer_offset = DestIndex * vs->so->stride[0];
     }
     target = This->pipe->create_stream_output_target(This->pipe, resource,
@@ -2784,13 +3213,21 @@ NineDevice9_SetVertexDeclaration( struct NineDevice9 *This,
                                   IDirect3DVertexDeclaration9 *pDecl )
 {
     struct nine_state *state = This->update;
+    BOOL was_programmable_vs = This->state.programmable_vs;
 
     DBG("This=%p pDecl=%p\n", This, pDecl);
 
     if (likely(!This->is_recording) && state->vdecl == NineVertexDeclaration9(pDecl))
         return D3D_OK;
+
     nine_bind(&state->vdecl, pDecl);
 
+    This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+    if (likely(!This->is_recording) && was_programmable_vs != This->state.programmable_vs) {
+        state->commit |= NINE_STATE_COMMIT_CONST_VS;
+        state->changed.group |= NINE_STATE_VS;
+    }
+
     state->changed.group |= NINE_STATE_VDECL;
 
     return D3D_OK;
@@ -2862,11 +3299,21 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This,
                              IDirect3DVertexShader9 *pShader )
 {
     struct nine_state *state = This->update;
+    BOOL was_programmable_vs = This->state.programmable_vs;
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
+    if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
+      return D3D_OK;
+
     nine_bind(&state->vs, pShader);
 
+    This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+
+    /* ff -> non-ff: commit back non-ff constants */
+    if (!was_programmable_vs && This->state.programmable_vs)
+        state->commit |= NINE_STATE_COMMIT_CONST_VS;
+
     state->changed.group |= NINE_STATE_VS;
 
     return D3D_OK;
@@ -2899,6 +3346,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
        return D3D_OK;
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
+    if (!This->is_recording) {
+        if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
+                    Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
+            return D3D_OK;
+    }
+
     memcpy(&state->vs_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
@@ -2938,6 +3391,7 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
                                       UINT Vector4iCount )
 {
     struct nine_state *state = This->update;
+    int i;
 
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n",
         This, StartRegister, pConstantData, Vector4iCount);
@@ -2946,9 +3400,23 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
     user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(&state->vs_const_i[StartRegister][0],
-           pConstantData,
-           Vector4iCount * sizeof(state->vs_const_i[0]));
+    if (This->driver_caps.vs_integer) {
+        if (!This->is_recording) {
+            if (!memcmp(&state->vs_const_i[StartRegister][0], pConstantData,
+                        Vector4iCount * sizeof(state->vs_const_i[0])))
+                return D3D_OK;
+        }
+        memcpy(&state->vs_const_i[StartRegister][0],
+               pConstantData,
+               Vector4iCount * sizeof(state->vs_const_i[0]));
+    } else {
+        for (i = 0; i < Vector4iCount; i++) {
+            state->vs_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
+            state->vs_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
+            state->vs_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
+            state->vs_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
+        }
+    }
 
     state->changed.vs_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
     state->changed.group |= NINE_STATE_VS_CONST;
@@ -2963,14 +3431,24 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
                                       UINT Vector4iCount )
 {
     const struct nine_state *state = &This->state;
+    int i;
 
     user_assert(StartRegister                  < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
     user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(pConstantData,
-           &state->vs_const_i[StartRegister][0],
-           Vector4iCount * sizeof(state->vs_const_i[0]));
+    if (This->driver_caps.vs_integer) {
+        memcpy(pConstantData,
+               &state->vs_const_i[StartRegister][0],
+               Vector4iCount * sizeof(state->vs_const_i[0]));
+    } else {
+        for (i = 0; i < Vector4iCount; i++) {
+            pConstantData[4*i] = (int32_t) uif(state->vs_const_i[StartRegister+i][0]);
+            pConstantData[4*i+1] = (int32_t) uif(state->vs_const_i[StartRegister+i][1]);
+            pConstantData[4*i+2] = (int32_t) uif(state->vs_const_i[StartRegister+i][2]);
+            pConstantData[4*i+3] = (int32_t) uif(state->vs_const_i[StartRegister+i][3]);
+        }
+    }
 
     return D3D_OK;
 }
@@ -2982,6 +3460,8 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
                                       UINT BoolCount )
 {
     struct nine_state *state = This->update;
+    int i;
+    uint32_t bool_true = This->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f);
 
     DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n",
         This, StartRegister, pConstantData, BoolCount);
@@ -2990,9 +3470,18 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
     user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(&state->vs_const_b[StartRegister],
-           pConstantData,
-           BoolCount * sizeof(state->vs_const_b[0]));
+    if (!This->is_recording) {
+        bool noChange = true;
+        for (i = 0; i < BoolCount; i++) {
+            if (!!state->vs_const_b[StartRegister + i] != !!pConstantData[i])
+              noChange = false;
+        }
+        if (noChange)
+            return D3D_OK;
+    }
+
+    for (i = 0; i < BoolCount; i++)
+        state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
 
     state->changed.vs_const_b |= ((1 << BoolCount) - 1) << StartRegister;
     state->changed.group |= NINE_STATE_VS_CONST;
@@ -3007,14 +3496,14 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
                                       UINT BoolCount )
 {
     const struct nine_state *state = &This->state;
+    int i;
 
     user_assert(StartRegister              < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(pConstantData,
-           &state->vs_const_b[StartRegister],
-           BoolCount * sizeof(state->vs_const_b[0]));
+    for (i = 0; i < BoolCount; i++)
+        pConstantData[i] = state->vs_const_b[StartRegister + i] != 0 ? TRUE : FALSE;
 
     return D3D_OK;
 }
@@ -3050,7 +3539,8 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This,
         state->vtxbuf[i].stride = Stride;
         state->vtxbuf[i].buffer_offset = OffsetInBytes;
     }
-    state->vtxbuf[i].buffer = pStreamData ? pVBuf9->base.resource : NULL;
+    pipe_resource_reference(&state->vtxbuf[i].buffer,
+                            pStreamData ? NineVertexBuffer9_GetResource(pVBuf9) : NULL);
 
     return D3D_OK;
 }
@@ -3093,6 +3583,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
                   (Setting & D3DSTREAMSOURCE_INDEXEDDATA)), D3DERR_INVALIDCALL);
     user_assert(Setting, D3DERR_INVALIDCALL);
 
+    if (likely(!This->is_recording) && state->stream_freq[StreamNumber] == Setting)
+        return D3D_OK;
+
     state->stream_freq[StreamNumber] = Setting;
 
     if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
@@ -3100,7 +3593,9 @@ NineDevice9_SetStreamSourceFreq( struct NineDevice9 *This,
     else
         state->stream_instancedata_mask &= ~(1 << StreamNumber);
 
-    state->changed.stream_freq |= 1 << StreamNumber;
+    state->changed.stream_freq |= 1 << StreamNumber; /* Used for stateblocks */
+    if (StreamNumber != 0)
+        state->changed.group |= NINE_STATE_STREAMFREQ;
     return D3D_OK;
 }
 
@@ -3167,13 +3662,28 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This,
                             IDirect3DPixelShader9 *pShader )
 {
     struct nine_state *state = This->update;
+    unsigned old_mask = state->ps ? state->ps->rt_mask : 1;
+    unsigned mask;
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
+    if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader)
+      return D3D_OK;
+
+    /* ff -> non-ff: commit back non-ff constants */
+    if (!state->ps && pShader)
+        state->commit |= NINE_STATE_COMMIT_CONST_PS;
+
     nine_bind(&state->ps, pShader);
 
     state->changed.group |= NINE_STATE_PS;
 
+    mask = state->ps ? state->ps->rt_mask : 1;
+    /* We need to update cbufs if the pixel shader would
+     * write to different render targets */
+    if (mask != old_mask)
+        state->changed.group |= NINE_STATE_FB;
+
     return D3D_OK;
 }
 
@@ -3197,13 +3707,19 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n",
         This, StartRegister, pConstantData, Vector4fCount);
 
-    user_assert(StartRegister                  < NINE_MAX_CONST_F, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + Vector4fCount <= NINE_MAX_CONST_F, D3DERR_INVALIDCALL);
+    user_assert(StartRegister                  < NINE_MAX_CONST_F_PS3, D3DERR_INVALIDCALL);
+    user_assert(StartRegister + Vector4fCount <= NINE_MAX_CONST_F_PS3, D3DERR_INVALIDCALL);
 
     if (!Vector4fCount)
        return D3D_OK;
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
+    if (!This->is_recording) {
+        if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
+                    Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
+            return D3D_OK;
+    }
+
     memcpy(&state->ps_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
@@ -3225,8 +3741,8 @@ NineDevice9_GetPixelShaderConstantF( struct NineDevice9 *This,
 {
     const struct nine_state *state = &This->state;
 
-    user_assert(StartRegister                  < NINE_MAX_CONST_F, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + Vector4fCount <= NINE_MAX_CONST_F, D3DERR_INVALIDCALL);
+    user_assert(StartRegister                  < NINE_MAX_CONST_F_PS3, D3DERR_INVALIDCALL);
+    user_assert(StartRegister + Vector4fCount <= NINE_MAX_CONST_F_PS3, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     memcpy(pConstantData,
@@ -3243,6 +3759,7 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
                                      UINT Vector4iCount )
 {
     struct nine_state *state = This->update;
+    int i;
 
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n",
         This, StartRegister, pConstantData, Vector4iCount);
@@ -3251,10 +3768,23 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
     user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(&state->ps_const_i[StartRegister][0],
-           pConstantData,
-           Vector4iCount * sizeof(state->ps_const_i[0]));
-
+    if (This->driver_caps.ps_integer) {
+        if (!This->is_recording) {
+            if (!memcmp(&state->ps_const_i[StartRegister][0], pConstantData,
+                        Vector4iCount * sizeof(state->ps_const_i[0])))
+                return D3D_OK;
+        }
+        memcpy(&state->ps_const_i[StartRegister][0],
+               pConstantData,
+               Vector4iCount * sizeof(state->ps_const_i[0]));
+    } else {
+        for (i = 0; i < Vector4iCount; i++) {
+            state->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
+            state->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
+            state->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
+            state->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
+        }
+    }
     state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
     state->changed.group |= NINE_STATE_PS_CONST;
 
@@ -3268,14 +3798,24 @@ NineDevice9_GetPixelShaderConstantI( struct NineDevice9 *This,
                                      UINT Vector4iCount )
 {
     const struct nine_state *state = &This->state;
+    int i;
 
     user_assert(StartRegister                  < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
     user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(pConstantData,
-           &state->ps_const_i[StartRegister][0],
-           Vector4iCount * sizeof(state->ps_const_i[0]));
+    if (This->driver_caps.ps_integer) {
+        memcpy(pConstantData,
+               &state->ps_const_i[StartRegister][0],
+               Vector4iCount * sizeof(state->ps_const_i[0]));
+    } else {
+        for (i = 0; i < Vector4iCount; i++) {
+            pConstantData[4*i] = (int32_t) uif(state->ps_const_i[StartRegister+i][0]);
+            pConstantData[4*i+1] = (int32_t) uif(state->ps_const_i[StartRegister+i][1]);
+            pConstantData[4*i+2] = (int32_t) uif(state->ps_const_i[StartRegister+i][2]);
+            pConstantData[4*i+3] = (int32_t) uif(state->ps_const_i[StartRegister+i][3]);
+        }
+    }
 
     return D3D_OK;
 }
@@ -3287,6 +3827,8 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
                                      UINT BoolCount )
 {
     struct nine_state *state = This->update;
+    int i;
+    uint32_t bool_true = This->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f);
 
     DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n",
         This, StartRegister, pConstantData, BoolCount);
@@ -3295,9 +3837,18 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
     user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(&state->ps_const_b[StartRegister],
-           pConstantData,
-           BoolCount * sizeof(state->ps_const_b[0]));
+    if (!This->is_recording) {
+        bool noChange = true;
+        for (i = 0; i < BoolCount; i++) {
+            if (!!state->ps_const_b[StartRegister + i] != !!pConstantData[i])
+              noChange = false;
+        }
+        if (noChange)
+            return D3D_OK;
+    }
+
+    for (i = 0; i < BoolCount; i++)
+        state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
 
     state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister;
     state->changed.group |= NINE_STATE_PS_CONST;
@@ -3312,14 +3863,14 @@ NineDevice9_GetPixelShaderConstantB( struct NineDevice9 *This,
                                      UINT BoolCount )
 {
     const struct nine_state *state = &This->state;
+    int i;
 
     user_assert(StartRegister              < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
-    memcpy(pConstantData,
-           &state->ps_const_b[StartRegister],
-           BoolCount * sizeof(state->ps_const_b[0]));
+    for (i = 0; i < BoolCount; i++)
+        pConstantData[i] = state->ps_const_b[StartRegister + i] ? TRUE : FALSE;
 
     return D3D_OK;
 }
@@ -3508,7 +4059,8 @@ NineDevice9_new( struct pipe_screen *pScreen,
                  struct d3dadapter9_context *pCTX,
                  boolean ex,
                  D3DDISPLAYMODEEX *pFullscreenDisplayMode,
-                 struct NineDevice9 **ppOut )
+                 struct NineDevice9 **ppOut,
+                 int minorVersionNum )
 {
     BOOL lock;
     lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
@@ -3516,5 +4068,5 @@ NineDevice9_new( struct pipe_screen *pScreen,
     NINE_NEW(Device9, ppOut, lock, /* args */
              pScreen, pCreationParameters, pCaps,
              pPresentationParameters, pD3D9, pPresentationGroup, pCTX,
-             ex, pFullscreenDisplayMode);
+             ex, pFullscreenDisplayMode, minorVersionNum );
 }