st/nine: Account POINTSIZE_MIN and POINTSIZE_MAX for point size
[mesa.git] / src / gallium / state_trackers / nine / nine_state.c
index 2cd5140055919e918769710c30e86d42e03498bd..db861c44f45260bc974190e0c32de05190b48cfe 100644 (file)
@@ -44,8 +44,11 @@ update_framebuffer(struct NineDevice9 *device)
     struct nine_state *state = &device->state;
     struct pipe_framebuffer_state *fb = &device->state.fb;
     unsigned i;
-    unsigned w = 0, h = 0; /* no surface can have width or height 0 */
-
+    struct NineSurface9 *rt0 = state->rt[0];
+    unsigned w = rt0->desc.Width;
+    unsigned h = rt0->desc.Height;
+    D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
+    unsigned mask = state->ps ? state->ps->rt_mask : 1;
     const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
 
     DBG("\n");
@@ -53,19 +56,32 @@ update_framebuffer(struct NineDevice9 *device)
     state->rt_mask = 0x0;
     fb->nr_cbufs = 0;
 
+    /* all render targets must have the same size and the depth buffer must be
+     * bigger. Multisample has to match, according to spec. But some apps do
+     * things wrong there, and no error is returned. The behaviour they get
+     * apparently is that depth buffer is disabled if it doesn't match.
+     * Surely the same for render targets. */
+
+    /* Special case: D3DFMT_NULL is used to bound no real render target,
+     * but render to depth buffer. We have to not take into account the render
+     * target info. TODO: know what should happen when there are several render targers
+     * and the first one is D3DFMT_NULL */
+    if (rt0->desc.Format == D3DFMT_NULL && state->ds) {
+        w = state->ds->desc.Width;
+        h = state->ds->desc.Height;
+        nr_samples = state->ds->desc.MultiSampleType;
+    }
+
     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
-        if (state->rt[i] && state->rt[i]->desc.Format != D3DFMT_NULL) {
-            struct NineSurface9 *rt = state->rt[i];
+        struct NineSurface9 *rt = state->rt[i];
+
+        if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
+            rt->desc.Width == w && rt->desc.Height == h &&
+            rt->desc.MultiSampleType == nr_samples) {
             fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
             state->rt_mask |= 1 << i;
             fb->nr_cbufs = i + 1;
-            if (w) {
-                w = MIN2(w, rt->desc.Width);
-                h = MIN2(h, rt->desc.Height);
-            } else {
-                w = rt->desc.Width;
-                h = rt->desc.Height;
-            }
+
             if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) {
                 assert(rt->texture == D3DRTYPE_TEXTURE ||
                        rt->texture == D3DRTYPE_CUBETEXTURE);
@@ -79,15 +95,10 @@ update_framebuffer(struct NineDevice9 *device)
         }
     }
 
-    if (state->ds) {
+    if (state->ds && state->ds->desc.Width >= w &&
+        state->ds->desc.Height >= h &&
+        state->ds->desc.MultiSampleType == nr_samples) {
         fb->zsbuf = NineSurface9_GetSurface(state->ds, 0);
-        if (w) {
-            w = MIN2(w, state->ds->desc.Width);
-            h = MIN2(h, state->ds->desc.Height);
-        } else {
-            w = state->ds->desc.Width;
-            h = state->ds->desc.Height;
-        }
     } else {
         fb->zsbuf = NULL;
     }
@@ -117,12 +128,6 @@ update_framebuffer(struct NineDevice9 *device)
         }
     }
 
-#ifdef DEBUG
-    if (state->rt_mask & (state->ps ? ~state->ps->rt_mask : 0))
-        WARN_ONCE("FIXME: writing undefined values to cbufs 0x%x\n",
-                  state->rt_mask & ~state->ps->rt_mask);
-#endif
-
     return state->changed.group;
 }
 
@@ -133,10 +138,9 @@ update_viewport(struct NineDevice9 *device)
     const D3DVIEWPORT9 *vport = &device->state.viewport;
     struct pipe_viewport_state pvport;
 
-    /* XXX:
-     * I hope D3D clip coordinates are still
+    /* D3D coordinates are:
      * -1 .. +1 for X,Y and
-     *  0 .. +1 for Z (use pipe_rasterizer_state.clip_halfz)
+     *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
      */
     pvport.scale[0] = (float)vport->Width * 0.5f;
     pvport.scale[1] = (float)vport->Height * -0.5f;
@@ -145,10 +149,34 @@ update_viewport(struct NineDevice9 *device)
     pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
     pvport.translate[2] = vport->MinZ;
 
+    /* We found R600 and SI cards have some imprecision
+     * on the barycentric coordinates used for interpolation.
+     * Some shaders rely on having something precise.
+     * We found that the proprietary driver has the imprecision issue,
+     * except when the render target width and height are powers of two.
+     * It is using some sort of workaround for these cases
+     * which covers likely all the cases the applications rely
+     * on something precise.
+     * We haven't found the workaround, but it seems like it's better
+     * for applications if the imprecision is biased towards infinity
+     * instead of -infinity (which is what measured). So shift slightly
+     * the viewport: not enough to change rasterization result (in particular
+     * for multisampling), but enough to make the imprecision biased
+     * towards infinity. We do this shift only if render target width and
+     * height are powers of two.
+     * Solves 'red shadows' bug on UE3 games.
+     */
+    if (device->driver_bugs.buggy_barycentrics &&
+        ((vport->Width & (vport->Width-1)) == 0) &&
+        ((vport->Height & (vport->Height-1)) == 0)) {
+        pvport.translate[0] -= 1.0f / 128.0f;
+        pvport.translate[1] -= 1.0f / 128.0f;
+    }
+
     pipe->set_viewport_states(pipe, 0, 1, &pvport);
 }
 
-static INLINE void
+static inline void
 update_scissor(struct NineDevice9 *device)
 {
     struct pipe_context *pipe = device->pipe;
@@ -156,19 +184,19 @@ update_scissor(struct NineDevice9 *device)
     pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor);
 }
 
-static INLINE void
+static inline void
 update_blend(struct NineDevice9 *device)
 {
     nine_convert_blend_state(device->cso, device->state.rs);
 }
 
-static INLINE void
+static inline void
 update_dsa(struct NineDevice9 *device)
 {
     nine_convert_dsa_state(device->cso, device->state.rs);
 }
 
-static INLINE void
+static inline void
 update_rasterizer(struct NineDevice9 *device)
 {
     nine_convert_rasterizer_state(device->cso, device->state.rs);
@@ -186,26 +214,52 @@ update_vertex_elements(struct NineDevice9 *device)
     const struct NineVertexShader9 *vs;
     unsigned n, b, i;
     int index;
+    char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
+    char used_streams[device->caps.MaxStreams];
+    int dummy_vbo_stream = -1;
+    BOOL need_dummy_vbo = FALSE;
     struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
 
     state->stream_usage_mask = 0;
-
+    memset(vdecl_index_map, -1, 16);
+    memset(used_streams, 0, device->caps.MaxStreams);
     vs = device->state.vs ? device->state.vs : device->ff.vs;
 
-    if (!vdecl) /* no inputs */
-        return;
-    for (n = 0; n < vs->num_inputs; ++n) {
-        DBG("looking up input %u (usage %u) from vdecl(%p)\n",
-            n, vs->input_map[n].ndecl, vdecl);
+    if (vdecl) {
+        for (n = 0; n < vs->num_inputs; ++n) {
+            DBG("looking up input %u (usage %u) from vdecl(%p)\n",
+                n, vs->input_map[n].ndecl, vdecl);
+
+            for (i = 0; i < vdecl->nelems; i++) {
+                if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
+                    vdecl_index_map[n] = i;
+                    used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
+                    break;
+                }
+            }
+            if (vdecl_index_map[n] < 0)
+                need_dummy_vbo = TRUE;
+        }
+    } else {
+        /* No vertex declaration. Likely will never happen in practice,
+         * but we need not crash on this */
+        need_dummy_vbo = TRUE;
+    }
 
-        index = -1;
-        for (i = 0; i < vdecl->nelems; i++) {
-            if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
-                index = i;
+    if (need_dummy_vbo) {
+        for (i = 0; i < device->caps.MaxStreams; i++ ) {
+            if (!used_streams[i]) {
+                dummy_vbo_stream = i;
                 break;
             }
         }
+    }
+    /* there are less vertex shader inputs than stream slots,
+     * so if we need a slot for the dummy vbo, we should have found one */
+    assert (!need_dummy_vbo || dummy_vbo_stream != -1);
 
+    for (n = 0; n < vs->num_inputs; ++n) {
+        index = vdecl_index_map[n];
         if (index >= 0) {
             ve[n] = vdecl->elems[index];
             b = ve[n].vertex_buffer_index;
@@ -214,23 +268,33 @@ update_vertex_elements(struct NineDevice9 *device)
             if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
                 ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
         } else {
-            /* TODO:
-             * If drivers don't want to handle this, insert a dummy buffer.
-             * But on which stream ?
-             */
-            /* no data, disable */
-            ve[n].src_format = PIPE_FORMAT_NONE;
+            /* if the vertex declaration is incomplete compared to what the
+             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
+             * This is not precised by the spec, but is the behaviour
+             * tested on win */
+            ve[n].vertex_buffer_index = dummy_vbo_stream;
+            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
             ve[n].src_offset = 0;
             ve[n].instance_divisor = 0;
-            ve[n].vertex_buffer_index = 0;
         }
     }
+
+    if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
+        if (state->dummy_vbo_bound_at >= 0)
+            state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
+        if (dummy_vbo_stream >= 0) {
+            state->changed.vtxbuf |= 1 << dummy_vbo_stream;
+            state->vbo_bound_done = FALSE;
+        }
+        state->dummy_vbo_bound_at = dummy_vbo_stream;
+    }
+
     cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
 
     state->changed.stream_freq = 0;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 update_shader_variant_keys(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
@@ -268,11 +332,12 @@ update_shader_variant_keys(struct NineDevice9 *device)
     return mask;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 update_vs(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
     struct NineVertexShader9 *vs = state->vs;
+    uint32_t changed_group = 0;
 
     /* likely because we dislike FF */
     if (likely(vs)) {
@@ -285,24 +350,21 @@ update_vs(struct NineDevice9 *device)
 
     if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
         state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
-        return NINE_STATE_RASTERIZER;
+        changed_group |= NINE_STATE_RASTERIZER;
     }
-#ifdef DEBUG
-    {
-        unsigned s, mask = vs->sampler_mask;
-        for (s = 0; mask; ++s, mask >>= 1)
-            if ((mask & 1) && !(device->state.texture[NINE_SAMPLER_VS(s)]))
-                WARN_ONCE("FIXME: unbound sampler should return alpha=1\n");
-    }
-#endif
-    return 0;
+
+    if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
+        /* Bound dummy sampler. */
+        changed_group |= NINE_STATE_SAMPLER;
+    return changed_group;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 update_ps(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
     struct NinePixelShader9 *ps = state->ps;
+    uint32_t changed_group = 0;
 
     if (likely(ps)) {
         state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key);
@@ -312,18 +374,10 @@ update_ps(struct NineDevice9 *device)
     }
     device->pipe->bind_fs_state(device->pipe, state->cso.ps);
 
-#ifdef DEBUG
-    {
-        unsigned s, mask = ps->sampler_mask;
-        for (s = 0; mask; ++s, mask >>= 1)
-            if ((mask & 1) && !(device->state.texture[NINE_SAMPLER_PS(s)]))
-                WARN_ONCE("FIXME: unbound sampler should return alpha=1\n");
-        if (device->state.rt_mask & ~ps->rt_mask)
-            WARN_ONCE("FIXME: writing undefined values to cbufs 0x%x\n",
-                device->state.rt_mask & ~ps->rt_mask);
-    }
-#endif
-    return 0;
+    if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
+        /* Bound dummy sampler. */
+        changed_group |= NINE_STATE_SAMPLER;
+    return changed_group;
 }
 
 #define DO_UPLOAD_CONST_F(buf,p,c,d) \
@@ -352,8 +406,8 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
     const unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE;
     unsigned x = 0; /* silence warning */
     unsigned i, c;
-    const struct nine_lconstf *lconstf;
-    struct nine_range *r, *p;
+    struct nine_range *r, *p, *lconstf_ranges;
+    float *lconstf_data;
 
     box.y = 0;
     box.z = 0;
@@ -381,7 +435,9 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
         device->state.changed.vs_const_b = 0;
         const_b = device->state.vs_const_b;
 
-        lconstf = &device->state.vs->lconstf;
+        lconstf_ranges = device->state.vs->lconstf.ranges;
+        lconstf_data = device->state.vs->lconstf.data;
+
         device->state.ff.clobber.vs_const = TRUE;
         device->state.changed.group &= ~NINE_STATE_VS_CONST;
     } else {
@@ -405,7 +461,9 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
         device->state.changed.ps_const_b = 0;
         const_b = device->state.ps_const_b;
 
-        lconstf = &device->state.ps->lconstf;
+        lconstf_ranges = NULL;
+        lconstf_data = NULL;
+
         device->state.ff.clobber.ps_const = TRUE;
         device->state.changed.group &= ~NINE_STATE_PS_CONST;
     }
@@ -451,14 +509,14 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
     }
 
     /* TODO: only upload these when shader itself changes */
-    if (lconstf->ranges) {
+    if (lconstf_ranges) {
         unsigned n = 0;
-        struct nine_range *r = lconstf->ranges;
+        struct nine_range *r = lconstf_ranges;
         while (r) {
             box.x = r->bgn * 4 * sizeof(float);
             n += r->end - r->bgn;
             box.width = (r->end - r->bgn) * 4 * sizeof(float);
-            data = &lconstf->data[4 * n];
+            data = &lconstf_data[4 * n];
             pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
             r = r->next;
         }
@@ -491,13 +549,12 @@ update_vs_constants_userbuf(struct NineDevice9 *device)
         state->changed.vs_const_b = 0;
     }
 
-#ifdef DEBUG
     if (device->state.vs->lconstf.ranges) {
         /* TODO: Can we make it so that we don't have to copy everything ? */
         const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
         const struct nine_range *r = lconstf->ranges;
         unsigned n = 0;
-        float *dst = (float *)MALLOC(cb.buffer_size);
+        float *dst = device->state.vs_lconstf_temp;
         float *src = (float *)cb.user_buffer;
         memcpy(dst, src, cb.buffer_size);
         while (r) {
@@ -509,15 +566,9 @@ update_vs_constants_userbuf(struct NineDevice9 *device)
         }
         cb.user_buffer = dst;
     }
-#endif
 
     pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
 
-#ifdef DEBUG
-    if (device->state.vs->lconstf.ranges)
-        FREE((void *)cb.user_buffer);
-#endif
-
     if (device->state.changed.vs_const_f) {
         struct nine_range *r = device->state.changed.vs_const_f;
         struct nine_range *p = r;
@@ -555,33 +606,8 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
         state->changed.ps_const_b = 0;
     }
 
-#ifdef DEBUG
-    if (device->state.ps->lconstf.ranges) {
-        /* TODO: Can we make it so that we don't have to copy everything ? */
-        const struct nine_lconstf *lconstf =  &device->state.ps->lconstf;
-        const struct nine_range *r = lconstf->ranges;
-        unsigned n = 0;
-        float *dst = (float *)MALLOC(cb.buffer_size);
-        float *src = (float *)cb.user_buffer;
-        memcpy(dst, src, cb.buffer_size);
-        while (r) {
-            unsigned p = r->bgn;
-            unsigned c = r->end - r->bgn;
-            memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
-            n += c;
-            r = r->next;
-        }
-        cb.user_buffer = dst;
-    }
-#endif
-
     pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
 
-#ifdef DEBUG
-    if (device->state.ps->lconstf.ranges)
-        FREE((void *)cb.user_buffer);
-#endif
-
     if (device->state.changed.ps_const_f) {
         struct nine_range *r = device->state.changed.ps_const_f;
         struct nine_range *p = r;
@@ -598,32 +624,39 @@ update_vertex_buffers(struct NineDevice9 *device)
 {
     struct pipe_context *pipe = device->pipe;
     struct nine_state *state = &device->state;
+    struct pipe_vertex_buffer dummy_vtxbuf;
     uint32_t mask = state->changed.vtxbuf;
     unsigned i;
     unsigned start;
-    unsigned count = 0;
 
     DBG("mask=%x\n", mask);
 
+    if (state->dummy_vbo_bound_at >= 0) {
+        if (!state->vbo_bound_done) {
+            dummy_vtxbuf.buffer = device->dummy_vbo;
+            dummy_vtxbuf.stride = 0;
+            dummy_vtxbuf.user_buffer = NULL;
+            dummy_vtxbuf.buffer_offset = 0;
+            pipe->set_vertex_buffers(pipe, state->dummy_vbo_bound_at,
+                                     1, &dummy_vtxbuf);
+            state->vbo_bound_done = TRUE;
+        }
+        mask &= ~(1 << state->dummy_vbo_bound_at);
+    }
+
     for (i = 0; mask; mask >>= 1, ++i) {
         if (mask & 1) {
-            if (!count)
-                start = i;
-            ++count;
-        } else {
-            if (count)
-                pipe->set_vertex_buffers(pipe,
-                                         start, count, &state->vtxbuf[start]);
-            count = 0;
+            if (state->vtxbuf[i].buffer)
+                pipe->set_vertex_buffers(pipe, i, 1, &state->vtxbuf[i]);
+            else
+                pipe->set_vertex_buffers(pipe, i, 1, NULL);
         }
     }
-    if (count)
-        pipe->set_vertex_buffers(pipe, start, count, &state->vtxbuf[start]);
 
     state->changed.vtxbuf = 0;
 }
 
-static INLINE void
+static inline void
 update_index_buffer(struct NineDevice9 *device)
 {
     struct pipe_context *pipe = device->pipe;
@@ -644,7 +677,7 @@ validate_textures(struct NineDevice9 *device)
     }
 }
 
-static INLINE boolean
+static inline boolean
 update_sampler_derived(struct nine_state *state, unsigned s)
 {
     boolean changed = FALSE;
@@ -655,7 +688,7 @@ update_sampler_derived(struct nine_state *state, unsigned s)
     }
 
     if (state->samp[s][D3DSAMP_MIPFILTER] != D3DTEXF_NONE) {
-        int lod = state->samp[s][D3DSAMP_MAXMIPLEVEL] - state->texture[s]->lod;
+        int lod = state->samp[s][D3DSAMP_MAXMIPLEVEL] - state->texture[s]->managed.lod;
         if (lod < 0)
             lod = 0;
         if (state->samp[s][NINED3DSAMP_MINLOD] != lod) {
@@ -676,72 +709,149 @@ update_textures_and_samplers(struct NineDevice9 *device)
     struct pipe_context *pipe = device->pipe;
     struct nine_state *state = &device->state;
     struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
+    struct pipe_sampler_state samp;
     unsigned num_textures;
     unsigned i;
+    boolean commit_views;
     boolean commit_samplers;
+    uint16_t sampler_mask = state->ps ? state->ps->sampler_mask :
+                            device->ff.ps->sampler_mask;
 
     /* TODO: Can we reduce iterations here ? */
 
+    commit_views = FALSE;
     commit_samplers = FALSE;
+    state->bound_samplers_mask_ps = 0;
     for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) {
         const unsigned s = NINE_SAMPLER_PS(i);
         int sRGB;
-        if (!state->texture[s]) {
+
+        if (!state->texture[s] && !(sampler_mask & (1 << i))) {
             view[i] = NULL;
-#ifdef DEBUG
-            if (state->ps && state->ps->sampler_mask & (1 << i))
-                WARN_ONCE("FIXME: unbound sampler should return alpha=1\n");
-#endif
             continue;
         }
-        sRGB = state->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
 
-        view[i] = NineBaseTexture9_GetSamplerView(state->texture[s], sRGB);
-        num_textures = i + 1;
+        if (state->texture[s]) {
+            sRGB = state->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
 
-        if (update_sampler_derived(state, s) || (state->changed.sampler[s] & 0x05fe)) {
-            state->changed.sampler[s] = 0;
+            view[i] = NineBaseTexture9_GetSamplerView(state->texture[s], sRGB);
+            num_textures = i + 1;
+
+            if (update_sampler_derived(state, s) || (state->changed.sampler[s] & 0x05fe)) {
+                state->changed.sampler[s] = 0;
+                commit_samplers = TRUE;
+                nine_convert_sampler_state(device->cso, s, state->samp[s]);
+            }
+        } else {
+            /* Bind dummy sampler. We do not bind dummy sampler when
+             * it is not needed because it could add overhead. The
+             * dummy sampler should have r=g=b=0 and a=1. We do not
+             * unbind dummy sampler directly when they are not needed
+             * anymore, but they're going to be removed as long as texture
+             * or sampler states are changed. */
+            view[i] = device->dummy_sampler;
+            num_textures = i + 1;
+
+            memset(&samp, 0, sizeof(samp));
+            samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+            samp.max_lod = 15.0f;
+            samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+            samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+            samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+            samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+            samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+            samp.compare_mode = PIPE_TEX_COMPARE_NONE;
+            samp.compare_func = PIPE_FUNC_LEQUAL;
+            samp.normalized_coords = 1;
+            samp.seamless_cube_map = 1;
+
+            cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT,
+                               s - NINE_SAMPLER_PS(0), &samp);
+
+            commit_views = TRUE;
             commit_samplers = TRUE;
-            nine_convert_sampler_state(device->cso, s, state->samp[s]);
+            state->changed.sampler[s] = ~0;
         }
+
+        state->bound_samplers_mask_ps |= (1 << s);
     }
-    if (state->changed.texture & NINE_PS_SAMPLERS_MASK)
+
+    commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0;
+    commit_views |= state->changed.srgb;
+    if (commit_views)
         pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
                                 num_textures, view);
 
     if (commit_samplers)
         cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT);
 
+    commit_views = FALSE;
     commit_samplers = FALSE;
+    sampler_mask = state->vs ? state->vs->sampler_mask : 0;
+    state->bound_samplers_mask_vs = 0;
     for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) {
         const unsigned s = NINE_SAMPLER_VS(i);
         int sRGB;
-        if (!state->texture[s]) {
+
+        if (!state->texture[s] && !(sampler_mask & (1 << i))) {
             view[i] = NULL;
-#ifdef DEBUG
-            if (state->vs && state->vs->sampler_mask & (1 << i))
-                WARN_ONCE("FIXME: unbound sampler should return alpha=1\n");
-#endif
             continue;
         }
-        sRGB = state->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
 
-        view[i] = NineBaseTexture9_GetSamplerView(state->texture[s], sRGB);
-        num_textures = i + 1;
+        if (state->texture[s]) {
+            sRGB = state->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
 
-        if (update_sampler_derived(state, s) || (state->changed.sampler[s] & 0x05fe)) {
-            state->changed.sampler[s] = 0;
+            view[i] = NineBaseTexture9_GetSamplerView(state->texture[s], sRGB);
+            num_textures = i + 1;
+
+            if (update_sampler_derived(state, s) || (state->changed.sampler[s] & 0x05fe)) {
+                state->changed.sampler[s] = 0;
+                commit_samplers = TRUE;
+                nine_convert_sampler_state(device->cso, s, state->samp[s]);
+            }
+        } else {
+            /* Bind dummy sampler. We do not bind dummy sampler when
+             * it is not needed because it could add overhead. The
+             * dummy sampler should have r=g=b=0 and a=1. We do not
+             * unbind dummy sampler directly when they are not needed
+             * anymore, but they're going to be removed as long as texture
+             * or sampler states are changed. */
+            view[i] = device->dummy_sampler;
+            num_textures = i + 1;
+
+            memset(&samp, 0, sizeof(samp));
+            samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+            samp.max_lod = 15.0f;
+            samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+            samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+            samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+            samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+            samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+            samp.compare_mode = PIPE_TEX_COMPARE_NONE;
+            samp.compare_func = PIPE_FUNC_LEQUAL;
+            samp.normalized_coords = 1;
+            samp.seamless_cube_map = 1;
+
+            cso_single_sampler(device->cso, PIPE_SHADER_VERTEX,
+                               s - NINE_SAMPLER_VS(0), &samp);
+
+            commit_views = TRUE;
             commit_samplers = TRUE;
-            nine_convert_sampler_state(device->cso, s, state->samp[s]);
+            state->changed.sampler[s] = ~0;
         }
+
+        state->bound_samplers_mask_vs |= (1 << s);
     }
-    if (state->changed.texture & NINE_VS_SAMPLERS_MASK)
+    commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0;
+    commit_views |= state->changed.srgb;
+    if (commit_views)
         pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0,
                                 num_textures, view);
 
     if (commit_samplers)
         cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX);
 
+    state->changed.srgb = FALSE;
     state->changed.texture = 0;
 }
 
@@ -983,7 +1093,8 @@ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
     [D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO,
     [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD,
     [NINED3DRS_VSPOINTSIZE] = FALSE,
-    [NINED3DRS_RTMASK] = 0xf
+    [NINED3DRS_RTMASK] = 0xf,
+    [NINED3DRS_ALPHACOVERAGE] = FALSE
 };
 static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] =
 {
@@ -1024,9 +1135,10 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
     [NINED3DSAMP_SHADOW] = 0
 };
 void
-nine_state_set_defaults(struct nine_state *state, const D3DCAPS9 *caps,
+nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
                         boolean is_reset)
 {
+    struct nine_state *state = &device->state;
     unsigned s;
 
     /* Initialize defaults.
@@ -1047,9 +1159,9 @@ nine_state_set_defaults(struct nine_state *state, const D3DCAPS9 *caps,
     }
 
     if (state->vs_const_f)
-        memset(state->vs_const_f, 0, NINE_MAX_CONST_F * 4 * sizeof(float));
+        memset(state->vs_const_f, 0, device->vs_const_size);
     if (state->ps_const_f)
-        memset(state->ps_const_f, 0, NINE_MAX_CONST_F * 4 * sizeof(float));
+        memset(state->ps_const_f, 0, device->ps_const_size);
 
     /* Cap dependent initial state:
      */
@@ -1069,6 +1181,11 @@ nine_state_set_defaults(struct nine_state *state, const D3DCAPS9 *caps,
 
     for (s = 0; s < Elements(state->changed.sampler); ++s)
         state->changed.sampler[s] = ~0;
+
+    if (!is_reset) {
+        state->dummy_vbo_bound_at = -1;
+        state->vbo_bound_done = FALSE;
+    }
 }
 
 void
@@ -1277,7 +1394,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
     [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER,
     [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER,
-    [D3DRS_POINTSIZE_MIN] = NINE_STATE_MISC_CONST,
+    [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER,
@@ -1287,7 +1404,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
     [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
     [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
     [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
-    [D3DRS_POINTSIZE_MAX] = NINE_STATE_MISC_CONST,
+    [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER,
     [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
     [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER,