llvmpipe: add LP_NEW_GS flag for updating vertex info
[mesa.git] / src / gallium / state_trackers / nine / device9.c
index 8f9872c15601547a47faf20e424212225fddb792..88df38c43f69f02b1110261f12b2d1eda853de15 100644 (file)
@@ -34,6 +34,7 @@
 #include "texture9.h"
 #include "cubetexture9.h"
 #include "volumetexture9.h"
+#include "nine_buffer_upload.h"
 #include "nine_helpers.h"
 #include "nine_pipe.h"
 #include "nine_ff.h"
@@ -100,6 +101,8 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
     This->state.viewport.Width = refSurf->desc.Width;
     This->state.viewport.Height = refSurf->desc.Height;
 
+    nine_context_set_viewport(This, &This->state.viewport);
+
     This->state.scissor.minx = 0;
     This->state.scissor.miny = 0;
     This->state.scissor.maxx = refSurf->desc.Width;
@@ -171,6 +174,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
         DBG("Application asked mixed Software Vertex Processing.\n");
         This->may_swvp = true;
     }
+    This->context.swvp = This->swvp;
     /* TODO: check if swvp is resetted by device Resets */
 
     if (This->may_swvp &&
@@ -189,18 +193,19 @@ NineDevice9_ctor( struct NineDevice9 *This,
     if (This->may_swvp)
         This->caps.MaxVertexShaderConst = NINE_MAX_CONST_F_SWVP;
 
-    This->pipe = This->screen->context_create(This->screen, NULL, 0);
-    if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
+    This->context.pipe = This->screen->context_create(This->screen, NULL, 0);
+    This->pipe_secondary = This->screen->context_create(This->screen, NULL, 0);
+    if (!This->context.pipe || !This->pipe_secondary) { return E_OUTOFMEMORY; } /* guess */
     This->pipe_sw = This->screen_sw->context_create(This->screen_sw, NULL, 0);
     if (!This->pipe_sw) { return E_OUTOFMEMORY; }
 
-    This->cso = cso_create_context(This->pipe);
-    if (!This->cso) { return E_OUTOFMEMORY; } /* also a guess */
-    This->cso_sw = cso_create_context(This->pipe_sw);
+    This->context.cso = cso_create_context(This->context.pipe, 0);
+    if (!This->context.cso) { return E_OUTOFMEMORY; } /* also a guess */
+    This->cso_sw = cso_create_context(This->pipe_sw, 0);
     if (!This->cso_sw) { return E_OUTOFMEMORY; }
 
     /* Create first, it messes up our state. */
-    This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
+    This->hud = hud_create(This->context.pipe, This->context.cso); /* NULL result is fine */
 
     /* Available memory counter. Updated only for allocations with this device
      * instance. This is the Win 7 behavior.
@@ -260,6 +265,32 @@ NineDevice9_ctor( struct NineDevice9 *This,
         nine_bind(&This->context.rt[i], This->state.rt[i]);
     }
 
+    /* Initialize CSMT */
+    if (pCTX->csmt_force == 1)
+        This->csmt_active = true;
+    else if (pCTX->csmt_force == 0)
+        This->csmt_active = false;
+    else
+        /* r600 and radeonsi are thread safe. */
+        This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != NULL;
+
+    /* We rely on u_upload_mgr using persistent coherent buffers (which don't
+     * require flush to work in multi-pipe_context scenario) for vertex and
+     * index buffers */
+    if (!GET_PCAP(BUFFER_MAP_PERSISTENT_COHERENT))
+        This->csmt_active = false;
+
+    if (This->csmt_active) {
+        This->csmt_ctx = nine_csmt_create(This);
+        if (!This->csmt_ctx)
+            return E_OUTOFMEMORY;
+    }
+
+    if (This->csmt_active)
+        DBG("\033[1;32mCSMT is active\033[0m\n");
+
+    This->buffer_upload = nine_upload_create(This->pipe_secondary, 4 * 1024 * 1024, 4);
+
     /* Initialize a dummy VBO to be used when a vertex declaration does not
      * specify all the inputs needed by vertex shader, on win default behavior
      * is to pass 0,0,0,0 to the shader */
@@ -287,14 +318,14 @@ NineDevice9_ctor( struct NineDevice9 *This,
             return D3DERR_OUTOFVIDEOMEMORY;
 
         u_box_1d(0, 16, &box);
-        data = This->pipe->transfer_map(This->pipe, This->dummy_vbo, 0,
+        data = This->context.pipe->transfer_map(This->context.pipe, This->dummy_vbo, 0,
                                         PIPE_TRANSFER_WRITE |
                                         PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
                                         &box, &transfer);
         assert(data);
         assert(transfer);
         memset(data, 0, 16);
-        This->pipe->transfer_unmap(This->pipe, transfer);
+        This->context.pipe->transfer_unmap(This->context.pipe, transfer);
     }
 
     This->cursor.software = FALSE;
@@ -421,7 +452,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
         templ.swizzle_a = PIPE_SWIZZLE_1;
         templ.target = This->dummy_texture->target;
 
-        This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
+        This->dummy_sampler_view = This->context.pipe->create_sampler_view(This->context.pipe, This->dummy_texture, &templ);
         if (!This->dummy_sampler_view)
             return D3DERR_DRIVERINTERNALERROR;
 
@@ -441,29 +472,13 @@ NineDevice9_ctor( struct NineDevice9 *This,
 
     /* Allocate upload helper for drivers that suck (from st pov ;). */
 
-    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
-    This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS) && !This->csmt_active;
     This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
     This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS);
     This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS);
-
-    if (!This->driver_caps.user_vbufs)
-        This->vertex_uploader = u_upload_create(This->pipe, 65536,
-                                                PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
-    This->vertex_sw_uploader = u_upload_create(This->pipe_sw, 65536,
-                                            PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
-    if (!This->driver_caps.user_ibufs)
-        This->index_uploader = u_upload_create(This->pipe, 128 * 1024,
-                                               PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM);
-    if (!This->driver_caps.user_cbufs) {
+    This->vertex_uploader = This->csmt_active ? This->pipe_secondary->stream_uploader : This->context.pipe->stream_uploader;
+    if (!This->driver_caps.user_cbufs)
         This->constbuf_alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT);
-        This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size,
-                                                  PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
-    }
-
-    This->constbuf_sw_uploader = u_upload_create(This->pipe_sw, 128 * 1024,
-                                                 PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
-
     This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
     This->driver_caps.vs_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS);
     This->driver_caps.ps_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
@@ -476,7 +491,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
     {
         struct pipe_poly_stipple stipple;
         memset(&stipple, ~0, sizeof(stipple));
-        This->pipe->set_polygon_stipple(This->pipe, &stipple);
+        This->context.pipe->set_polygon_stipple(This->context.pipe, &stipple);
     }
 
     This->update = &This->state;
@@ -484,6 +499,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
     nine_state_init_sw(This);
 
     ID3DPresentGroup_Release(This->present);
+    nine_csmt_process(This);
 
     return D3D_OK;
 }
@@ -496,23 +512,22 @@ NineDevice9_dtor( struct NineDevice9 *This )
 
     DBG("This=%p\n", This);
 
-    if (This->pipe && This->cso)
-        nine_pipe_context_clear(This);
+    /* Flush all pending commands to get refcount right,
+     * and properly release bound objects. It is ok to still
+     * execute commands while we are in device dtor, because
+     * we haven't released anything yet. Note that no pending
+     * command can increase the device refcount. */
+    if (This->csmt_active && This->csmt_ctx) {
+        nine_csmt_process(This);
+        nine_csmt_destroy(This, This->csmt_ctx);
+        This->csmt_active = FALSE;
+        This->csmt_ctx = NULL;
+    }
+
     nine_ff_fini(This);
     nine_state_destroy_sw(This);
     nine_state_clear(&This->state, TRUE);
-    nine_context_clear(&This->context);
-
-    if (This->vertex_uploader)
-        u_upload_destroy(This->vertex_uploader);
-    if (This->index_uploader)
-        u_upload_destroy(This->index_uploader);
-    if (This->constbuf_uploader)
-        u_upload_destroy(This->constbuf_uploader);
-    if (This->vertex_sw_uploader)
-        u_upload_destroy(This->vertex_sw_uploader);
-    if (This->constbuf_sw_uploader)
-        u_upload_destroy(This->constbuf_sw_uploader);
+    nine_context_clear(This);
 
     nine_bind(&This->record, NULL);
 
@@ -542,10 +557,14 @@ NineDevice9_dtor( struct NineDevice9 *This )
         FREE(This->swapchains);
     }
 
+    if (This->buffer_upload)
+        nine_upload_destroy(This->buffer_upload);
+
     /* Destroy cso first */
-    if (This->cso) { cso_destroy_context(This->cso); }
+    if (This->context.cso) { cso_destroy_context(This->context.cso); }
     if (This->cso_sw) { cso_destroy_context(This->cso_sw); }
-    if (This->pipe && This->pipe->destroy) { This->pipe->destroy(This->pipe); }
+    if (This->context.pipe && This->context.pipe->destroy) { This->context.pipe->destroy(This->context.pipe); }
+    if (This->pipe_secondary && This->pipe_secondary->destroy) { This->pipe_secondary->destroy(This->pipe_secondary); }
     if (This->pipe_sw && This->pipe_sw->destroy) { This->pipe_sw->destroy(This->pipe_sw); }
 
     if (This->present) { ID3DPresentGroup_Release(This->present); }
@@ -563,13 +582,7 @@ NineDevice9_GetScreen( struct NineDevice9 *This )
 struct pipe_context *
 NineDevice9_GetPipe( struct NineDevice9 *This )
 {
-    return This->pipe;
-}
-
-struct cso_context *
-NineDevice9_GetCSO( struct NineDevice9 *This )
-{
-    return This->cso;
+    return nine_context_get_pipe(This);
 }
 
 const D3DCAPS9 *
@@ -687,7 +700,7 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
                                  IDirect3DSurface9 *pCursorBitmap )
 {
     struct NineSurface9 *surf = NineSurface9(pCursorBitmap);
-    struct pipe_context *pipe = This->pipe;
+    struct pipe_context *pipe = NineDevice9_GetPipe(This);
     struct pipe_box box;
     struct pipe_transfer *transfer;
     BOOL hw_cursor;
@@ -875,9 +888,9 @@ NineDevice9_Reset( struct NineDevice9 *This,
             break;
     }
 
-    nine_pipe_context_clear(This);
+    nine_csmt_process(This);
     nine_state_clear(&This->state, TRUE);
-    nine_context_clear(&This->context);
+    nine_context_clear(This);
 
     NineDevice9_SetDefaultState(This, TRUE);
     NineDevice9_SetRenderTarget(
@@ -1528,7 +1541,6 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                          D3DTEXTUREFILTERTYPE Filter )
 {
     struct pipe_screen *screen = This->screen;
-    struct pipe_context *pipe = This->pipe;
     struct NineSurface9 *dst = NineSurface9(pDestSurface);
     struct NineSurface9 *src = NineSurface9(pSourceSurface);
     struct pipe_resource *dst_res = NineSurface9_GetResource(dst);
@@ -1701,7 +1713,8 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                                                 PIPE_BIND_RENDER_TARGET),
                     D3DERR_INVALIDCALL);
 
-        pipe->blit(pipe, &blit);
+        nine_context_blit(This, (struct NineUnknown *)dst,
+                          (struct NineUnknown *)src, &blit);
     } else {
         assert(blit.dst.box.x >= 0 && blit.dst.box.y >= 0 &&
                blit.src.box.x >= 0 && blit.src.box.y >= 0 &&
@@ -1711,11 +1724,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
                blit.src.box.y + blit.src.box.height <= src->desc.Height);
         /* Or drivers might crash ... */
         DBG("Using resource_copy_region.\n");
-        pipe->resource_copy_region(pipe,
-            blit.dst.resource, blit.dst.level,
-            blit.dst.box.x, blit.dst.box.y, blit.dst.box.z,
-            blit.src.resource, blit.src.level,
-            &blit.src.box);
+        nine_context_resource_copy_region(This, (struct NineUnknown *)dst,
+                                          (struct NineUnknown *)src,
+                                          blit.dst.resource, blit.dst.level,
+                                          &blit.dst.box,
+                                          blit.src.resource, blit.src.level,
+                                          &blit.src.box);
     }
 
     /* Communicate the container it needs to update sublevels - if apply */
@@ -1730,12 +1744,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
                        const RECT *pRect,
                        D3DCOLOR color )
 {
-    struct pipe_context *pipe = This->pipe;
     struct NineSurface9 *surf = NineSurface9(pSurface);
-    struct pipe_surface *psurf;
     unsigned x, y, w, h;
-    union pipe_color_union rgba;
-    boolean fallback;
 
     DBG("This=%p pSurface=%p pRect=%p color=%08x\n", This,
         pSurface, pRect, color);
@@ -1769,24 +1779,15 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
         w = surf->desc.Width;
         h = surf->desc.Height;
     }
-    d3dcolor_to_pipe_color_union(&rgba, color);
 
-    fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET);
-
-    if (!fallback) {
-        psurf = NineSurface9_GetSurface(surf, 0);
-        if (!psurf)
-            fallback = TRUE;
-    }
-
-    if (!fallback) {
-        pipe->clear_render_target(pipe, psurf, &rgba, x, y, w, h, false);
+    if (surf->base.info.bind & PIPE_BIND_RENDER_TARGET) {
+        nine_context_clear_render_target(This, surf, color, x, y, w, h);
     } else {
         D3DLOCKED_RECT lock;
         union util_color uc;
         HRESULT hr;
         /* XXX: lock pRect and fix util_fill_rect */
-        hr = NineSurface9_LockRect(surf, &lock, NULL, 0);
+        hr = NineSurface9_LockRect(surf, &lock, NULL, pRect ? 0 : D3DLOCK_DISCARD);
         if (FAILED(hr))
             return hr;
         util_pack_color_ub(color >> 16, color >> 8, color >> 0, color >> 24,
@@ -1891,11 +1892,12 @@ HRESULT NINE_WINAPI
 NineDevice9_SetDepthStencilSurface( struct NineDevice9 *This,
                                     IDirect3DSurface9 *pNewZStencil )
 {
+    struct NineSurface9 *ds = NineSurface9(pNewZStencil);
     DBG("This=%p pNewZStencil=%p\n", This, pNewZStencil);
 
-    if (This->state.ds != NineSurface9(pNewZStencil)) {
-        nine_bind(&This->state.ds, pNewZStencil);
-        This->state.changed.group |= NINE_STATE_FB;
+    if (This->state.ds != ds) {
+        nine_bind(&This->state.ds, ds);
+        nine_context_set_depth_stencil(This, ds);
     }
     return D3D_OK;
 }
@@ -1973,15 +1975,18 @@ NineDevice9_SetTransform( struct NineDevice9 *This,
                           const D3DMATRIX *pMatrix )
 {
     struct nine_state *state = This->update;
-    D3DMATRIX *M = nine_state_access_transform(state, State, TRUE);
+    D3DMATRIX *M = nine_state_access_transform(&state->ff, State, TRUE);
 
     DBG("This=%p State=%d pMatrix=%p\n", This, State, pMatrix);
 
     user_assert(M, D3DERR_INVALIDCALL);
 
     *M = *pMatrix;
-    state->ff.changed.transform[State / 32] |= 1 << (State % 32);
-    state->changed.group |= NINE_STATE_FF;
+    if (unlikely(This->is_recording)) {
+        state->ff.changed.transform[State / 32] |= 1 << (State % 32);
+        state->changed.group |= NINE_STATE_FF;
+    } else
+        nine_context_set_transform(This, State, pMatrix);
 
     return D3D_OK;
 }
@@ -1991,7 +1996,7 @@ NineDevice9_GetTransform( struct NineDevice9 *This,
                           D3DTRANSFORMSTATETYPE State,
                           D3DMATRIX *pMatrix )
 {
-    D3DMATRIX *M = nine_state_access_transform(&This->state, State, FALSE);
+    D3DMATRIX *M = nine_state_access_transform(&This->state.ff, State, FALSE);
     user_assert(M, D3DERR_INVALIDCALL);
     *pMatrix = *M;
     return D3D_OK;
@@ -2004,7 +2009,7 @@ NineDevice9_MultiplyTransform( struct NineDevice9 *This,
 {
     struct nine_state *state = This->update;
     D3DMATRIX T;
-    D3DMATRIX *M = nine_state_access_transform(state, State, TRUE);
+    D3DMATRIX *M = nine_state_access_transform(&state->ff, State, TRUE);
 
     DBG("This=%p State=%d pMatrix=%p\n", This, State, pMatrix);
 
@@ -2025,7 +2030,7 @@ NineDevice9_SetViewport( struct NineDevice9 *This,
         pViewport->MinZ, pViewport->MaxZ);
 
     state->viewport = *pViewport;
-    state->changed.group |= NINE_STATE_VIEWPORT;
+    nine_context_set_viewport(This, pViewport);
 
     return D3D_OK;
 }
@@ -2051,7 +2056,10 @@ NineDevice9_SetMaterial( struct NineDevice9 *This,
     user_assert(pMaterial, E_POINTER);
 
     state->ff.material = *pMaterial;
-    state->changed.group |= NINE_STATE_FF_MATERIAL;
+    if (unlikely(This->is_recording))
+        state->changed.group |= NINE_STATE_FF_MATERIAL;
+    else
+        nine_context_set_material(This, pMaterial);
 
     return D3D_OK;
 }
@@ -2071,6 +2079,7 @@ NineDevice9_SetLight( struct NineDevice9 *This,
                       const D3DLIGHT9 *pLight )
 {
     struct nine_state *state = This->update;
+    HRESULT hr;
 
     DBG("This=%p Index=%u pLight=%p\n", This, Index, pLight);
     if (pLight)
@@ -2081,27 +2090,10 @@ NineDevice9_SetLight( struct NineDevice9 *This,
 
     user_assert(Index < NINE_MAX_LIGHTS, D3DERR_INVALIDCALL); /* sanity */
 
-    if (Index >= state->ff.num_lights) {
-        unsigned n = state->ff.num_lights;
-        unsigned N = Index + 1;
-
-        state->ff.light = REALLOC(state->ff.light, n * sizeof(D3DLIGHT9),
-                                                   N * sizeof(D3DLIGHT9));
-        if (!state->ff.light)
-            return E_OUTOFMEMORY;
-        state->ff.num_lights = N;
-
-        for (; n < Index; ++n) {
-            memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9));
-            state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
-        }
-    }
-    state->ff.light[Index] = *pLight;
+    hr = nine_state_set_light(&state->ff, Index, pLight);
+    if (hr != D3D_OK)
+        return hr;
 
-    if (pLight->Type == D3DLIGHT_SPOT && pLight->Theta >= pLight->Phi) {
-        DBG("Warning: clamping D3DLIGHT9.Theta\n");
-        state->ff.light[Index].Theta = state->ff.light[Index].Phi;
-    }
     if (pLight->Type != D3DLIGHT_DIRECTIONAL &&
         pLight->Attenuation0 == 0.0f &&
         pLight->Attenuation1 == 0.0f &&
@@ -2109,7 +2101,10 @@ NineDevice9_SetLight( struct NineDevice9 *This,
         DBG("Warning: all D3DLIGHT9.Attenuation[i] are 0\n");
     }
 
-    state->changed.group |= NINE_STATE_FF_LIGHTING;
+    if (unlikely(This->is_recording))
+        state->changed.group |= NINE_STATE_FF_LIGHTING;
+    else
+        nine_context_set_light(This, Index, pLight);
 
     return D3D_OK;
 }
@@ -2137,7 +2132,6 @@ NineDevice9_LightEnable( struct NineDevice9 *This,
                          BOOL Enable )
 {
     struct nine_state *state = This->update;
-    unsigned i;
 
     DBG("This=%p Index=%u Enable=%i\n", This, Index, Enable);
 
@@ -2153,30 +2147,10 @@ NineDevice9_LightEnable( struct NineDevice9 *This,
         light.Direction.z = 1.0f;
         NineDevice9_SetLight(This, Index, &light);
     }
-    user_assert(Index < state->ff.num_lights, D3DERR_INVALIDCALL);
-
-    for (i = 0; i < state->ff.num_lights_active; ++i) {
-        if (state->ff.active_light[i] == Index)
-            break;
-    }
 
-    if (Enable) {
-        if (i < state->ff.num_lights_active)
-            return D3D_OK;
-        /* XXX wine thinks this should still succeed:
-         */
-        user_assert(i < NINE_MAX_LIGHTS_ACTIVE, D3DERR_INVALIDCALL);
-
-        state->ff.active_light[i] = Index;
-        state->ff.num_lights_active++;
-    } else {
-        if (i == state->ff.num_lights_active)
-            return D3D_OK;
-        --state->ff.num_lights_active;
-        for (; i < state->ff.num_lights_active; ++i)
-            state->ff.active_light[i] = state->ff.active_light[i + 1];
-    }
-    state->changed.group |= NINE_STATE_FF_LIGHTING;
+    nine_state_light_enable(&state->ff, &state->changed.group, Index, Enable);
+    if (likely(!This->is_recording))
+        nine_context_light_enable(This, Index, Enable);
 
     return D3D_OK;
 }
@@ -2218,7 +2192,10 @@ NineDevice9_SetClipPlane( struct NineDevice9 *This,
     user_assert(Index < PIPE_MAX_CLIP_PLANES, D3DERR_INVALIDCALL);
 
     memcpy(&state->clip.ucp[Index][0], pPlane, sizeof(state->clip.ucp[0]));
-    state->changed.ucp |= 1 << Index;
+    if (unlikely(This->is_recording))
+        state->changed.ucp |= 1 << Index;
+    else
+        nine_context_set_clip_plane(This, Index, (struct nine_clipplane *)pPlane);
 
     return D3D_OK;
 }
@@ -2490,19 +2467,10 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
     if (old == tex)
         return D3D_OK;
 
-    if (tex) {
-        if ((tex->managed.dirty | tex->dirty_mip) && LIST_IS_EMPTY(&tex->list))
-            list_add(&tex->list, &This->update_textures);
-
-        tex->bind_count++;
-    }
-    if (old)
-        old->bind_count--;
+    NineBindTextureToDevice(This, &state->texture[Stage], tex);
 
     nine_context_set_texture(This, Stage, tex);
 
-    nine_bind(&state->texture[Stage], pTexture);
-
     return D3D_OK;
 }
 
@@ -2529,8 +2497,6 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
                                   DWORD Value )
 {
     struct nine_state *state = This->update;
-    struct nine_context *context = &This->context;
-    int bumpmap_index = -1;
 
     DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value);
     nine_dump_D3DTSS_value(DBG_FF, Type, Value);
@@ -2539,39 +2505,14 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
     user_assert(Type < ARRAY_SIZE(state->ff.tex_stage[0]), D3DERR_INVALIDCALL);
 
     state->ff.tex_stage[Stage][Type] = Value;
-    switch (Type) {
-    case D3DTSS_BUMPENVMAT00:
-        bumpmap_index = 4 * Stage;
-        break;
-    case D3DTSS_BUMPENVMAT01:
-        bumpmap_index = 4 * Stage + 1;
-        break;
-    case D3DTSS_BUMPENVMAT10:
-        bumpmap_index = 4 * Stage + 2;
-        break;
-    case D3DTSS_BUMPENVMAT11:
-        bumpmap_index = 4 * Stage + 3;
-        break;
-    case D3DTSS_BUMPENVLSCALE:
-        bumpmap_index = 4 * 8 + 2 * Stage;
-        break;
-    case D3DTSS_BUMPENVLOFFSET:
-        bumpmap_index = 4 * 8 + 2 * Stage + 1;
-        break;
-    case D3DTSS_TEXTURETRANSFORMFLAGS:
-        state->changed.group |= NINE_STATE_PS1X_SHADER;
-        break;
-    default:
-        break;
-    }
-
-    if (bumpmap_index >= 0 && !This->is_recording) {
-        context->bumpmap_vars[bumpmap_index] = Value;
-        state->changed.group |= NINE_STATE_PS_CONST;
-    }
 
-    state->changed.group |= NINE_STATE_FF_PSSTAGES;
-    state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
+    if (unlikely(This->is_recording)) {
+        if (Type == D3DTSS_TEXTURETRANSFORMFLAGS)
+            state->changed.group |= NINE_STATE_PS1X_SHADER;
+        state->changed.group |= NINE_STATE_FF_PSSTAGES;
+        state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
+    } else
+        nine_context_set_texture_stage_state(This, Stage, Type, Value);
 
     return D3D_OK;
 }
@@ -2739,7 +2680,7 @@ NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
 {
     if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
         This->swvp = bSoftware;
-        This->state.changed.group |= NINE_STATE_SWVP;
+        nine_context_set_swvp(This, bSoftware);
         return D3D_OK;
     } else
         return D3DERR_INVALIDCALL; /* msdn. TODO: check in practice */
@@ -2764,6 +2705,55 @@ NineDevice9_GetNPatchMode( struct NineDevice9 *This )
     STUB(0);
 }
 
+/* TODO: only go through dirty textures */
+static void
+validate_textures(struct NineDevice9 *device)
+{
+    struct NineBaseTexture9 *tex, *ptr;
+    LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) {
+        list_delinit(&tex->list);
+        NineBaseTexture9_Validate(tex);
+    }
+}
+
+static void
+update_managed_buffers(struct NineDevice9 *device)
+{
+    struct NineBuffer9 *buf, *ptr;
+    LIST_FOR_EACH_ENTRY_SAFE(buf, ptr, &device->update_buffers, managed.list) {
+        list_delinit(&buf->managed.list);
+        NineBuffer9_Upload(buf);
+    }
+}
+
+static void
+NineBeforeDraw( struct NineDevice9 *This )
+{
+    /* Upload Managed dirty content */
+    validate_textures(This); /* may clobber state */
+    update_managed_buffers(This);
+}
+
+static void
+NineAfterDraw( struct NineDevice9 *This )
+{
+    unsigned i;
+    struct nine_state *state = &This->state;
+    unsigned ps_mask = state->ps ? state->ps->rt_mask : 1;
+
+    /* Flag render-targets with autogenmipmap for mipmap regeneration */
+    for (i = 0; i < This->caps.NumSimultaneousRTs; ++i) {
+        struct NineSurface9 *rt = state->rt[i];
+
+        if (rt && rt->desc.Format != D3DFMT_NULL && (ps_mask & (1 << i)) &&
+            rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP) {
+            assert(rt->texture == D3DRTYPE_TEXTURE ||
+                   rt->texture == D3DRTYPE_CUBETEXTURE);
+            NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE;
+        }
+    }
+}
+
 HRESULT NINE_WINAPI
 NineDevice9_DrawPrimitive( struct NineDevice9 *This,
                            D3DPRIMITIVETYPE PrimitiveType,
@@ -2773,7 +2763,9 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This,
     DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n",
         This, PrimitiveType, StartVertex, PrimitiveCount);
 
+    NineBeforeDraw(This);
     nine_context_draw_primitive(This, PrimitiveType, StartVertex, PrimitiveCount);
+    NineAfterDraw(This);
 
     return D3D_OK;
 }
@@ -2795,9 +2787,11 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
     user_assert(This->state.idxbuf, D3DERR_INVALIDCALL);
     user_assert(This->state.vdecl, D3DERR_INVALIDCALL);
 
+    NineBeforeDraw(This);
     nine_context_draw_indexed_primitive(This, PrimitiveType, BaseVertexIndex,
                                         MinVertexIndex, NumVertices, StartIndex,
                                         PrimitiveCount);
+    NineAfterDraw(This);
 
     return D3D_OK;
 }
@@ -2821,24 +2815,27 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
 
     vtxbuf.stride = VertexStreamZeroStride;
     vtxbuf.buffer_offset = 0;
-    vtxbuf.buffer = NULL;
-    vtxbuf.user_buffer = pVertexStreamZeroData;
+    vtxbuf.is_user_buffer = true;
+    vtxbuf.buffer.user = pVertexStreamZeroData;
 
     if (!This->driver_caps.user_vbufs) {
+        vtxbuf.is_user_buffer = false;
+        vtxbuf.buffer.resource = NULL;
         u_upload_data(This->vertex_uploader,
                       0,
                       (prim_count_to_vertex_count(PrimitiveType, PrimitiveCount)) * VertexStreamZeroStride, /* XXX */
                       4,
-                      vtxbuf.user_buffer,
+                      pVertexStreamZeroData,
                       &vtxbuf.buffer_offset,
-                      &vtxbuf.buffer);
+                      &vtxbuf.buffer.resource);
         u_upload_unmap(This->vertex_uploader);
-        vtxbuf.user_buffer = NULL;
     }
 
+    NineBeforeDraw(This);
     nine_context_draw_primitive_from_vtxbuf(This, PrimitiveType, PrimitiveCount, &vtxbuf);
+    NineAfterDraw(This);
 
-    pipe_resource_reference(&vtxbuf.buffer, NULL);
+    pipe_vertex_buffer_unreference(&vtxbuf);
 
     NineDevice9_PauseRecording(This);
     NineDevice9_SetStreamSource(This, 0, NULL, 0, 0);
@@ -2859,7 +2856,6 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
                                     UINT VertexStreamZeroStride )
 {
     struct pipe_vertex_buffer vbuf;
-    struct pipe_index_buffer ibuf;
 
     DBG("iface %p, PrimitiveType %u, MinVertexIndex %u, NumVertices %u "
         "PrimitiveCount %u, pIndexData %p, IndexDataFormat %u "
@@ -2876,49 +2872,54 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
 
     vbuf.stride = VertexStreamZeroStride;
     vbuf.buffer_offset = 0;
-    vbuf.buffer = NULL;
-    vbuf.user_buffer = pVertexStreamZeroData;
+    vbuf.is_user_buffer = true;
+    vbuf.buffer.user = pVertexStreamZeroData;
 
-    ibuf.index_size = (IndexDataFormat == D3DFMT_INDEX16) ? 2 : 4;
-    ibuf.offset = 0;
-    ibuf.buffer = NULL;
-    ibuf.user_buffer = pIndexData;
+    unsigned index_size = (IndexDataFormat == D3DFMT_INDEX16) ? 2 : 4;
+    struct pipe_resource *ibuf = NULL;
 
     if (!This->driver_caps.user_vbufs) {
         const unsigned base = MinVertexIndex * VertexStreamZeroStride;
+        vbuf.is_user_buffer = false;
+        vbuf.buffer.resource = NULL;
         u_upload_data(This->vertex_uploader,
                       base,
                       NumVertices * VertexStreamZeroStride, /* XXX */
                       4,
-                      (const uint8_t *)vbuf.user_buffer + base,
+                      (const uint8_t *)pVertexStreamZeroData + base,
                       &vbuf.buffer_offset,
-                      &vbuf.buffer);
+                      &vbuf.buffer.resource);
         u_upload_unmap(This->vertex_uploader);
         /* Won't be used: */
         vbuf.buffer_offset -= base;
-        vbuf.user_buffer = NULL;
     }
-    if (!This->driver_caps.user_ibufs) {
-        u_upload_data(This->index_uploader,
+
+    unsigned index_offset = 0;
+    if (This->csmt_active) {
+        u_upload_data(This->pipe_secondary->stream_uploader,
                       0,
-                      (prim_count_to_vertex_count(PrimitiveType, PrimitiveCount)) * ibuf.index_size,
+                      (prim_count_to_vertex_count(PrimitiveType, PrimitiveCount)) * index_size,
                       4,
-                      ibuf.user_buffer,
-                      &ibuf.offset,
-                      &ibuf.buffer);
-        u_upload_unmap(This->index_uploader);
-        ibuf.user_buffer = NULL;
+                      pIndexData,
+                      &index_offset,
+                      &ibuf);
+        u_upload_unmap(This->pipe_secondary->stream_uploader);
     }
 
+    NineBeforeDraw(This);
     nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf(This, PrimitiveType,
                                                            MinVertexIndex,
                                                            NumVertices,
                                                            PrimitiveCount,
                                                            &vbuf,
-                                                           &ibuf);
+                                                           ibuf,
+                                                           ibuf ? NULL : (void*)pIndexData,
+                                                           index_offset,
+                                                           index_size);
+    NineAfterDraw(This);
 
-    pipe_resource_reference(&vbuf.buffer, NULL);
-    pipe_resource_reference(&ibuf.buffer, NULL);
+    pipe_vertex_buffer_unreference(&vbuf);
+    pipe_resource_reference(&ibuf, NULL);
 
     NineDevice9_PauseRecording(This);
     NineDevice9_SetIndices(This, NULL);
@@ -3031,9 +3032,8 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
     draw.restart_index = 0;
     draw.count_from_stream_output = NULL;
     draw.indirect = NULL;
-    draw.indirect_params = NULL;
     draw.instance_count = 1;
-    draw.indexed = FALSE;
+    draw.index_size = 0;
     draw.start = 0;
     draw.index_bias = 0;
     draw.min_index = 0;
@@ -3245,7 +3245,9 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
            pConstantData,
            Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
 
-    nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount);
+    nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData,
+                                              Vector4fCount * 4 * sizeof(state->vs_const_f[0]),
+                                              Vector4fCount);
 
     return D3D_OK;
 }
@@ -3311,7 +3313,8 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
                            &This->range_pool);
         state->changed.group |= NINE_STATE_VS_CONST;
     } else
-        nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount);
+        nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData,
+                                                  Vector4iCount * sizeof(int[4]), Vector4iCount);
 
     return D3D_OK;
 }
@@ -3385,7 +3388,8 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
                            &This->range_pool);
         state->changed.group |= NINE_STATE_VS_CONST;
     } else
-        nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData, BoolCount);
+        nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData,
+                                                  sizeof(BOOL) * BoolCount, BoolCount);
 
     return D3D_OK;
 }
@@ -3444,7 +3448,9 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This,
     state->vtxbuf[i].stride = Stride;
     state->vtxbuf[i].buffer_offset = OffsetInBytes;
 
-    nine_bind(&state->stream[i], pStreamData);
+    NineBindBufferToDevice(This,
+                           (struct NineBuffer9 **)&state->stream[i],
+                           (struct NineBuffer9 *)pVBuf9);
 
     nine_context_set_stream_source(This,
                                    StreamNumber,
@@ -3537,7 +3543,10 @@ NineDevice9_SetIndices( struct NineDevice9 *This,
 
     if (state->idxbuf == idxbuf)
         return D3D_OK;
-    nine_bind(&state->idxbuf, idxbuf);
+
+    NineBindBufferToDevice(This,
+                           (struct NineBuffer9 **)&state->idxbuf,
+                           (struct NineBuffer9 *)idxbuf);
 
     nine_context_set_indices(This, idxbuf);
 
@@ -3578,28 +3587,25 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This,
                             IDirect3DPixelShader9 *pShader )
 {
     struct nine_state *state = This->update;
-    struct nine_context *context = &This->context;
-    unsigned old_mask = state->ps ? state->ps->rt_mask : 1;
-    unsigned mask;
+    struct NinePixelShader9 *ps = (struct NinePixelShader9*)pShader;
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
-    if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader)
-      return D3D_OK;
-
-    /* ff -> non-ff: commit back non-ff constants */
-    if (!state->ps && pShader)
-        context->commit |= NINE_STATE_COMMIT_CONST_PS;
+    if (unlikely(This->is_recording)) {
+        /* Technically we need NINE_STATE_FB only
+         * if the ps mask changes, but put it always
+         * to be safe */
+        nine_bind(&state->ps, pShader);
+        state->changed.group |= NINE_STATE_PS | NINE_STATE_FB;
+        return D3D_OK;
+    }
 
-    nine_bind(&state->ps, pShader);
+    if (state->ps == ps)
+        return D3D_OK;
 
-    state->changed.group |= NINE_STATE_PS;
+    nine_bind(&state->ps, ps);
 
-    mask = state->ps ? state->ps->rt_mask : 1;
-    /* We need to update cbufs if the pixel shader would
-     * write to different render targets */
-    if (mask != old_mask)
-        state->changed.group |= NINE_STATE_FB;
+    nine_context_set_pixel_shader(This, ps);
 
     return D3D_OK;
 }
@@ -3652,7 +3658,9 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
            pConstantData,
            Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
 
-    nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount);
+    nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData,
+                                             Vector4fCount * 4 * sizeof(state->ps_const_f[0]),
+                                             Vector4fCount);
 
     return D3D_OK;
 }
@@ -3714,7 +3722,8 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
         state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
         state->changed.group |= NINE_STATE_PS_CONST;
     } else
-        nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount);
+        nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData,
+                                                 sizeof(state->ps_const_i[0]) * Vector4iCount, Vector4iCount);
 
     return D3D_OK;
 }
@@ -3782,7 +3791,8 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
         state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister;
         state->changed.group |= NINE_STATE_PS_CONST;
     } else
-        nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData, BoolCount);
+        nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData,
+                                                 sizeof(BOOL) * BoolCount, BoolCount);
 
     return D3D_OK;
 }