X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fstate_trackers%2Fnine%2Fnine_state.c;h=6aa632501122cdd8b8b3d758f62309bfa1003a6f;hb=46dba701d81199e6450dc50ef3f0ff476dbfb17f;hp=034e666d84c61c91ec73ca272943b416c7a87d50;hpb=54f8e8a18da58c85a2f515d5fd0552fa4f5547bb;p=mesa.git diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 034e666d84c..6aa63250112 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -23,6 +23,7 @@ #include "device9.h" #include "basetexture9.h" +#include "buffer9.h" #include "indexbuffer9.h" #include "surface9.h" #include "vertexdeclaration9.h" @@ -33,352 +34,52 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "cso_cache/cso_context.h" +#include "util/u_upload_mgr.h" #include "util/u_math.h" #define DBG_CHANNEL DBG_DEVICE -static uint32_t -update_framebuffer(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - struct nine_state *state = &device->state; - struct pipe_framebuffer_state *fb = &device->state.fb; - unsigned i; - struct NineSurface9 *rt0 = state->rt[0]; - unsigned w = rt0->desc.Width; - unsigned h = rt0->desc.Height; - D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; - unsigned mask = state->ps ? state->ps->rt_mask : 1; - const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; - - DBG("\n"); - - state->rt_mask = 0x0; - fb->nr_cbufs = 0; +/* Check if some states need to be set dirty */ - /* all render targets must have the same size and the depth buffer must be - * bigger. Multisample has to match, according to spec. But some apps do - * things wrong there, and no error is returned. The behaviour they get - * apparently is that depth buffer is disabled if it doesn't match. - * Surely the same for render targets. */ - - /* Special case: D3DFMT_NULL is used to bound no real render target, - * but render to depth buffer. We have to not take into account the render - * target info. TODO: know what should happen when there are several render targers - * and the first one is D3DFMT_NULL */ - if (rt0->desc.Format == D3DFMT_NULL && state->ds) { - w = state->ds->desc.Width; - h = state->ds->desc.Height; - nr_samples = state->ds->desc.MultiSampleType; - } - - for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) { - struct NineSurface9 *rt = state->rt[i]; - - if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) && - rt->desc.Width == w && rt->desc.Height == h && - rt->desc.MultiSampleType == nr_samples) { - fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB); - state->rt_mask |= 1 << i; - fb->nr_cbufs = i + 1; - - if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) { - assert(rt->texture == D3DRTYPE_TEXTURE || - rt->texture == D3DRTYPE_CUBETEXTURE); - NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE; - } - } else { - /* Color outputs must match RT slot, - * drivers will have to handle NULL entries for GL, too. - */ - fb->cbufs[i] = NULL; - } - } - - if (state->ds && state->ds->desc.Width >= w && - state->ds->desc.Height >= h && - state->ds->desc.MultiSampleType == nr_samples) { - fb->zsbuf = NineSurface9_GetSurface(state->ds, 0); - } else { - fb->zsbuf = NULL; - } - - fb->width = w; - fb->height = h; - - pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ - - if (fb->zsbuf) { - DWORD scale; - switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - scale = fui(1.0f); - break; - case PIPE_FORMAT_Z16_UNORM: - scale = fui((float)(1 << 16)); - break; - default: - scale = fui((float)(1 << 24)); - break; - } - if (state->rs[NINED3DRS_ZBIASSCALE] != scale) { - state->rs[NINED3DRS_ZBIASSCALE] = scale; - state->changed.group |= NINE_STATE_RASTERIZER; - } - } - - return state->changed.group; -} - -static void -update_viewport(struct NineDevice9 *device) +static inline DWORD +check_multisample(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; - const D3DVIEWPORT9 *vport = &device->state.viewport; - struct pipe_viewport_state pvport; - - /* D3D coordinates are: - * -1 .. +1 for X,Y and - * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) - */ - pvport.scale[0] = (float)vport->Width * 0.5f; - pvport.scale[1] = (float)vport->Height * -0.5f; - pvport.scale[2] = vport->MaxZ - vport->MinZ; - pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X; - pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; - pvport.translate[2] = vport->MinZ; - - /* We found R600 and SI cards have some imprecision - * on the barycentric coordinates used for interpolation. - * Some shaders rely on having something precise. - * We found that the proprietary driver has the imprecision issue, - * except when the render target width and height are powers of two. - * It is using some sort of workaround for these cases - * which covers likely all the cases the applications rely - * on something precise. - * We haven't found the workaround, but it seems like it's better - * for applications if the imprecision is biased towards infinity - * instead of -infinity (which is what measured). So shift slightly - * the viewport: not enough to change rasterization result (in particular - * for multisampling), but enough to make the imprecision biased - * towards infinity. We do this shift only if render target width and - * height are powers of two. - * Solves 'red shadows' bug on UE3 games. - */ - if (device->driver_bugs.buggy_barycentrics && - ((vport->Width & (vport->Width-1)) == 0) && - ((vport->Height & (vport->Height-1)) == 0)) { - pvport.translate[0] -= 1.0f / 128.0f; - pvport.translate[1] -= 1.0f / 128.0f; + DWORD *rs = device->state.rs; + DWORD new_value = (rs[D3DRS_ZENABLE] || rs[D3DRS_STENCILENABLE]) && + device->state.rt[0]->desc.MultiSampleType >= 1 && + rs[D3DRS_MULTISAMPLEANTIALIAS]; + if (rs[NINED3DRS_MULTISAMPLE] != new_value) { + rs[NINED3DRS_MULTISAMPLE] = new_value; + return NINE_STATE_RASTERIZER; } - - pipe->set_viewport_states(pipe, 0, 1, &pvport); + return 0; } -static INLINE void -update_scissor(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - - pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor); -} +/* State preparation only */ -static INLINE void -update_blend(struct NineDevice9 *device) +static inline void +prepare_blend(struct NineDevice9 *device) { - nine_convert_blend_state(device->cso, device->state.rs); + nine_convert_blend_state(&device->state.pipe.blend, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_BLEND; } -static INLINE void -update_dsa(struct NineDevice9 *device) +static inline void +prepare_dsa(struct NineDevice9 *device) { - nine_convert_dsa_state(device->cso, device->state.rs); + nine_convert_dsa_state(&device->state.pipe.dsa, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_DSA; } -static INLINE void -update_rasterizer(struct NineDevice9 *device) +static inline void +prepare_rasterizer(struct NineDevice9 *device) { - nine_convert_rasterizer_state(device->cso, device->state.rs); + nine_convert_rasterizer_state(device, &device->state.pipe.rast, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_RASTERIZER; } -/* Loop through VS inputs and pick the vertex elements with the declared - * usage from the vertex declaration, then insert the instance divisor from - * the stream source frequency setting. - */ static void -update_vertex_elements(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; - const struct NineVertexShader9 *vs; - unsigned n, b, i; - int index; - char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ - char used_streams[device->caps.MaxStreams]; - int dummy_vbo_stream = -1; - BOOL need_dummy_vbo = FALSE; - struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; - - state->stream_usage_mask = 0; - memset(vdecl_index_map, -1, 16); - memset(used_streams, 0, device->caps.MaxStreams); - vs = device->state.vs ? device->state.vs : device->ff.vs; - - if (vdecl) { - for (n = 0; n < vs->num_inputs; ++n) { - DBG("looking up input %u (usage %u) from vdecl(%p)\n", - n, vs->input_map[n].ndecl, vdecl); - - for (i = 0; i < vdecl->nelems; i++) { - if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { - vdecl_index_map[n] = i; - used_streams[vdecl->elems[i].vertex_buffer_index] = 1; - break; - } - } - if (vdecl_index_map[n] < 0) - need_dummy_vbo = TRUE; - } - } else { - /* No vertex declaration. Likely will never happen in practice, - * but we need not crash on this */ - need_dummy_vbo = TRUE; - } - - if (need_dummy_vbo) { - for (i = 0; i < device->caps.MaxStreams; i++ ) { - if (!used_streams[i]) { - dummy_vbo_stream = i; - break; - } - } - } - /* there are less vertex shader inputs than stream slots, - * so if we need a slot for the dummy vbo, we should have found one */ - assert (!need_dummy_vbo || dummy_vbo_stream != -1); - - for (n = 0; n < vs->num_inputs; ++n) { - index = vdecl_index_map[n]; - if (index >= 0) { - ve[n] = vdecl->elems[index]; - b = ve[n].vertex_buffer_index; - state->stream_usage_mask |= 1 << b; - /* XXX wine just uses 1 here: */ - if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) - ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; - } else { - /* if the vertex declaration is incomplete compared to what the - * vertex shader needs, we bind a dummy vbo with 0 0 0 0. - * This is not precised by the spec, but is the behaviour - * tested on win */ - ve[n].vertex_buffer_index = dummy_vbo_stream; - ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - ve[n].src_offset = 0; - ve[n].instance_divisor = 0; - } - } - - if (state->dummy_vbo_bound_at != dummy_vbo_stream) { - if (state->dummy_vbo_bound_at >= 0) - state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; - if (dummy_vbo_stream >= 0) { - state->changed.vtxbuf |= 1 << dummy_vbo_stream; - state->vbo_bound_done = FALSE; - } - state->dummy_vbo_bound_at = dummy_vbo_stream; - } - - cso_set_vertex_elements(device->cso, vs->num_inputs, ve); - - state->changed.stream_freq = 0; -} - -static INLINE uint32_t -update_shader_variant_keys(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - uint32_t mask = 0; - uint32_t vs_key = state->samplers_shadow; - uint32_t ps_key = state->samplers_shadow; - - vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0); - ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0); - - if (state->vs) vs_key &= state->vs->sampler_mask; - if (state->ps) { - if (unlikely(state->ps->byte_code.version < 0x20)) { - /* no depth textures, but variable targets */ - uint32_t m = state->ps->sampler_mask; - ps_key = 0; - while (m) { - int s = ffs(m) - 1; - m &= ~(1 << s); - ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); - } - } else { - ps_key &= state->ps->sampler_mask; - } - } - - if (state->vs && state->vs_key != vs_key) { - state->vs_key = vs_key; - mask |= NINE_STATE_VS; - } - if (state->ps && state->ps_key != ps_key) { - state->ps_key = ps_key; - mask |= NINE_STATE_PS; - } - return mask; -} - -static INLINE uint32_t -update_vs(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NineVertexShader9 *vs = state->vs; - uint32_t changed_group = 0; - - /* likely because we dislike FF */ - if (likely(vs)) { - state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key); - } else { - vs = device->ff.vs; - state->cso.vs = vs->variant.cso; - } - device->pipe->bind_vs_state(device->pipe, state->cso.vs); - - if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { - state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; - changed_group |= NINE_STATE_RASTERIZER; - } - - if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} - -static INLINE uint32_t -update_ps(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NinePixelShader9 *ps = state->ps; - uint32_t changed_group = 0; - - if (likely(ps)) { - state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key); - } else { - ps = device->ff.ps; - state->cso.ps = ps->variant.cso; - } - device->pipe->bind_fs_state(device->pipe, state->cso.ps); - - if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} +prepare_ps_constants_userbuf(struct NineDevice9 *device); #define DO_UPLOAD_CONST_F(buf,p,c,d) \ do { \ @@ -391,7 +92,7 @@ update_ps(struct NineDevice9 *device) /* OK, this is a bit ugly ... */ static void -update_constants(struct NineDevice9 *device, unsigned shader_type) +upload_constants(struct NineDevice9 *device, unsigned shader_type) { struct pipe_context *pipe = device->pipe; struct pipe_resource *buf; @@ -438,10 +139,17 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) lconstf_ranges = device->state.vs->lconstf.ranges; lconstf_data = device->state.vs->lconstf.data; - device->state.ff.clobber.vs_const = TRUE; device->state.changed.group &= ~NINE_STATE_VS_CONST; } else { DBG("PS\n"); + /* features only implemented on the userbuf path */ + if (device->state.ps->bumpenvmat_needed || ( + device->state.ps->byte_code.version < 0x30 && + device->state.rs[D3DRS_FOGENABLE])) { + device->prefer_user_constbuf = TRUE; + prepare_ps_constants_userbuf(device); + return; + } buf = device->constbuf_ps; const_f = device->state.ps_const_f; @@ -464,7 +172,6 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) lconstf_ranges = NULL; lconstf_data = NULL; - device->state.ff.clobber.ps_const = TRUE; device->state.changed.group &= ~NINE_STATE_PS_CONST; } @@ -524,10 +231,9 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) } static void -update_vs_constants_userbuf(struct NineDevice9 *device) +prepare_vs_constants_userbuf(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; cb.buffer = NULL; cb.buffer_offset = 0; @@ -567,7 +273,19 @@ update_vs_constants_userbuf(struct NineDevice9 *device) cb.user_buffer = dst; } - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + device->constbuf_alignment, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_vs = cb; if (device->state.changed.vs_const_f) { struct nine_range *r = device->state.changed.vs_const_f; @@ -578,45 +296,364 @@ update_vs_constants_userbuf(struct NineDevice9 *device) device->state.changed.vs_const_f = NULL; } state->changed.group &= ~NINE_STATE_VS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_VS; +} + +static void +prepare_ps_constants_userbuf(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_constant_buffer cb; + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = device->state.ps->const_used_size; + cb.user_buffer = device->state.ps_const_f; + + if (state->changed.ps_const_i) { + int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; + memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); + state->changed.ps_const_i = 0; + } + if (state->changed.ps_const_b) { + int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; + uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; + memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b)); + state->changed.ps_const_b = 0; + } + + /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ + if (device->state.ps->bumpenvmat_needed) { + memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars)); + + cb.user_buffer = device->state.ps_lconstf_temp; + } + + if (state->ps->byte_code.version < 0x30 && + state->rs[D3DRS_FOGENABLE]) { + float *dst = &state->ps_lconstf_temp[4 * 32]; + if (cb.user_buffer != state->ps_lconstf_temp) { + memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + cb.user_buffer = state->ps_lconstf_temp; + } + + d3dcolor_to_rgba(dst, state->rs[D3DRS_FOGCOLOR]); + if (state->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) { + dst[4] = asfloat(state->rs[D3DRS_FOGEND]); + dst[5] = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART])); + } else if (state->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) { + dst[4] = asfloat(state->rs[D3DRS_FOGDENSITY]); + } + cb.buffer_size = 4 * 4 * 34; + } + + if (!cb.buffer_size) + return; + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + device->constbuf_alignment, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_ps = cb; + + if (device->state.changed.ps_const_f) { + struct nine_range *r = device->state.changed.ps_const_f; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.ps_const_f = NULL; + } + state->changed.group &= ~NINE_STATE_PS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_PS; +} + +static inline uint32_t +prepare_vs(struct NineDevice9 *device, uint8_t shader_changed) +{ + struct nine_state *state = &device->state; + struct NineVertexShader9 *vs = state->vs; + uint32_t changed_group = 0; + int has_key_changed = 0; + + if (likely(state->programmable_vs)) + has_key_changed = NineVertexShader9_UpdateKey(vs, state); + + if (!shader_changed && !has_key_changed) + return 0; + + /* likely because we dislike FF */ + if (likely(state->programmable_vs)) { + state->cso.vs = NineVertexShader9_GetVariant(vs); + } else { + vs = device->ff.vs; + state->cso.vs = vs->ff_cso; + } + + if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { + state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; + changed_group |= NINE_STATE_RASTERIZER; + } + + if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + + state->commit |= NINE_STATE_COMMIT_VS; + return changed_group; +} + +static inline uint32_t +prepare_ps(struct NineDevice9 *device, uint8_t shader_changed) +{ + struct nine_state *state = &device->state; + struct NinePixelShader9 *ps = state->ps; + uint32_t changed_group = 0; + int has_key_changed = 0; + + if (likely(ps)) + has_key_changed = NinePixelShader9_UpdateKey(ps, state); + + if (!shader_changed && !has_key_changed) + return 0; + + if (likely(ps)) { + state->cso.ps = NinePixelShader9_GetVariant(ps); + } else { + ps = device->ff.ps; + state->cso.ps = ps->ff_cso; + } + + if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + + state->commit |= NINE_STATE_COMMIT_PS; + return changed_group; +} + +/* State preparation incremental */ + +/* State preparation + State commit */ + +static void +update_framebuffer(struct NineDevice9 *device, bool is_clear) +{ + struct pipe_context *pipe = device->pipe; + struct nine_state *state = &device->state; + struct pipe_framebuffer_state *fb = &device->state.fb; + unsigned i; + struct NineSurface9 *rt0 = state->rt[0]; + unsigned w = rt0->desc.Width; + unsigned h = rt0->desc.Height; + D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; + unsigned ps_mask = state->ps ? state->ps->rt_mask : 1; + unsigned mask = is_clear ? 0xf : ps_mask; + const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; + + DBG("\n"); + + state->rt_mask = 0x0; + fb->nr_cbufs = 0; + + /* all render targets must have the same size and the depth buffer must be + * bigger. Multisample has to match, according to spec. But some apps do + * things wrong there, and no error is returned. The behaviour they get + * apparently is that depth buffer is disabled if it doesn't match. + * Surely the same for render targets. */ + + /* Special case: D3DFMT_NULL is used to bound no real render target, + * but render to depth buffer. We have to not take into account the render + * target info. TODO: know what should happen when there are several render targers + * and the first one is D3DFMT_NULL */ + if (rt0->desc.Format == D3DFMT_NULL && state->ds) { + w = state->ds->desc.Width; + h = state->ds->desc.Height; + nr_samples = state->ds->desc.MultiSampleType; + } + + for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) { + struct NineSurface9 *rt = state->rt[i]; + + if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) && + rt->desc.Width == w && rt->desc.Height == h && + rt->desc.MultiSampleType == nr_samples) { + fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB); + state->rt_mask |= 1 << i; + fb->nr_cbufs = i + 1; + + if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) { + assert(rt->texture == D3DRTYPE_TEXTURE || + rt->texture == D3DRTYPE_CUBETEXTURE); + NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE; + } + } else { + /* Color outputs must match RT slot, + * drivers will have to handle NULL entries for GL, too. + */ + fb->cbufs[i] = NULL; + } + } + + if (state->ds && state->ds->desc.Width >= w && + state->ds->desc.Height >= h && + state->ds->desc.MultiSampleType == nr_samples) { + fb->zsbuf = NineSurface9_GetSurface(state->ds, 0); + } else { + fb->zsbuf = NULL; + } + + fb->width = w; + fb->height = h; + + pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ + + if (is_clear && state->rt_mask == ps_mask) + state->changed.group &= ~NINE_STATE_FB; +} + +static void +update_viewport(struct NineDevice9 *device) +{ + const D3DVIEWPORT9 *vport = &device->state.viewport; + struct pipe_viewport_state pvport; + + /* D3D coordinates are: + * -1 .. +1 for X,Y and + * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) + */ + pvport.scale[0] = (float)vport->Width * 0.5f; + pvport.scale[1] = (float)vport->Height * -0.5f; + pvport.scale[2] = vport->MaxZ - vport->MinZ; + pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X; + pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; + pvport.translate[2] = vport->MinZ; + + /* We found R600 and SI cards have some imprecision + * on the barycentric coordinates used for interpolation. + * Some shaders rely on having something precise. + * We found that the proprietary driver has the imprecision issue, + * except when the render target width and height are powers of two. + * It is using some sort of workaround for these cases + * which covers likely all the cases the applications rely + * on something precise. + * We haven't found the workaround, but it seems like it's better + * for applications if the imprecision is biased towards infinity + * instead of -infinity (which is what measured). So shift slightly + * the viewport: not enough to change rasterization result (in particular + * for multisampling), but enough to make the imprecision biased + * towards infinity. We do this shift only if render target width and + * height are powers of two. + * Solves 'red shadows' bug on UE3 games. + */ + if (device->driver_bugs.buggy_barycentrics && + ((vport->Width & (vport->Width-1)) == 0) && + ((vport->Height & (vport->Height-1)) == 0)) { + pvport.translate[0] -= 1.0f / 128.0f; + pvport.translate[1] -= 1.0f / 128.0f; + } + + cso_set_viewport(device->cso, &pvport); } +/* Loop through VS inputs and pick the vertex elements with the declared + * usage from the vertex declaration, then insert the instance divisor from + * the stream source frequency setting. + */ static void -update_ps_constants_userbuf(struct NineDevice9 *device) +update_vertex_elements(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - cb.buffer = NULL; - cb.buffer_offset = 0; - cb.buffer_size = device->state.ps->const_used_size; - cb.user_buffer = device->state.ps_const_f; + const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; + const struct NineVertexShader9 *vs; + unsigned n, b, i; + int index; + char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ + char used_streams[device->caps.MaxStreams]; + int dummy_vbo_stream = -1; + BOOL need_dummy_vbo = FALSE; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; - if (!cb.buffer_size) - return; + state->stream_usage_mask = 0; + memset(vdecl_index_map, -1, 16); + memset(used_streams, 0, device->caps.MaxStreams); + vs = state->programmable_vs ? device->state.vs : device->ff.vs; - if (state->changed.ps_const_i) { - int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; - memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); - state->changed.ps_const_i = 0; + if (vdecl) { + for (n = 0; n < vs->num_inputs; ++n) { + DBG("looking up input %u (usage %u) from vdecl(%p)\n", + n, vs->input_map[n].ndecl, vdecl); + + for (i = 0; i < vdecl->nelems; i++) { + if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { + vdecl_index_map[n] = i; + used_streams[vdecl->elems[i].vertex_buffer_index] = 1; + break; + } + } + if (vdecl_index_map[n] < 0) + need_dummy_vbo = TRUE; + } + } else { + /* No vertex declaration. Likely will never happen in practice, + * but we need not crash on this */ + need_dummy_vbo = TRUE; } - if (state->changed.ps_const_b) { - int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; - uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; - memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b)); - state->changed.ps_const_b = 0; + + if (need_dummy_vbo) { + for (i = 0; i < device->caps.MaxStreams; i++ ) { + if (!used_streams[i]) { + dummy_vbo_stream = i; + break; + } + } } + /* there are less vertex shader inputs than stream slots, + * so if we need a slot for the dummy vbo, we should have found one */ + assert (!need_dummy_vbo || dummy_vbo_stream != -1); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); + for (n = 0; n < vs->num_inputs; ++n) { + index = vdecl_index_map[n]; + if (index >= 0) { + ve[n] = vdecl->elems[index]; + b = ve[n].vertex_buffer_index; + state->stream_usage_mask |= 1 << b; + /* XXX wine just uses 1 here: */ + if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) + ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; + } else { + /* if the vertex declaration is incomplete compared to what the + * vertex shader needs, we bind a dummy vbo with 0 0 0 0. + * This is not precised by the spec, but is the behaviour + * tested on win */ + ve[n].vertex_buffer_index = dummy_vbo_stream; + ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[n].src_offset = 0; + ve[n].instance_divisor = 0; + } + } - if (device->state.changed.ps_const_f) { - struct nine_range *r = device->state.changed.ps_const_f; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.ps_const_f = NULL; + if (state->dummy_vbo_bound_at != dummy_vbo_stream) { + if (state->dummy_vbo_bound_at >= 0) + state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; + if (dummy_vbo_stream >= 0) { + state->changed.vtxbuf |= 1 << dummy_vbo_stream; + state->vbo_bound_done = FALSE; + } + state->dummy_vbo_bound_at = dummy_vbo_stream; } - state->changed.group &= ~NINE_STATE_PS_CONST; + + cso_set_vertex_elements(device->cso, vs->num_inputs, ve); + + state->changed.stream_freq = 0; } static void @@ -627,7 +664,6 @@ update_vertex_buffers(struct NineDevice9 *device) struct pipe_vertex_buffer dummy_vtxbuf; uint32_t mask = state->changed.vtxbuf; unsigned i; - unsigned start; DBG("mask=%x\n", mask); @@ -656,28 +692,7 @@ update_vertex_buffers(struct NineDevice9 *device) state->changed.vtxbuf = 0; } -static INLINE void -update_index_buffer(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - if (device->state.idxbuf) - pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer); - else - pipe->set_index_buffer(pipe, NULL); -} - -/* TODO: only go through dirty textures */ -static void -validate_textures(struct NineDevice9 *device) -{ - struct NineBaseTexture9 *tex, *ptr; - LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) { - list_delinit(&tex->list); - NineBaseTexture9_Validate(tex); - } -} - -static INLINE boolean +static inline boolean update_sampler_derived(struct nine_state *state, unsigned s) { boolean changed = FALSE; @@ -688,7 +703,7 @@ update_sampler_derived(struct nine_state *state, unsigned s) } if (state->samp[s][D3DSAMP_MIPFILTER] != D3DTEXF_NONE) { - int lod = state->samp[s][D3DSAMP_MAXMIPLEVEL] - state->texture[s]->lod; + int lod = state->samp[s][D3DSAMP_MAXMIPLEVEL] - state->texture[s]->managed.lod; if (lod < 0) lod = 0; if (state->samp[s][NINED3DSAMP_MINLOD] != lod) { @@ -706,20 +721,16 @@ update_sampler_derived(struct nine_state *state, unsigned s) static void update_textures_and_samplers(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; struct pipe_sampler_view *view[NINE_MAX_SAMPLERS]; - struct pipe_sampler_state samp; unsigned num_textures; unsigned i; - boolean commit_views; boolean commit_samplers; uint16_t sampler_mask = state->ps ? state->ps->sampler_mask : device->ff.ps->sampler_mask; /* TODO: Can we reduce iterations here ? */ - commit_views = FALSE; commit_samplers = FALSE; state->bound_samplers_mask_ps = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) { @@ -749,26 +760,12 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT, - s - NINE_SAMPLER_PS(0), &samp); + s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } @@ -776,18 +773,13 @@ update_textures_and_samplers(struct NineDevice9 *device) state->bound_samplers_mask_ps |= (1 << s); } - commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - num_textures, view); + cso_set_sampler_views(device->cso, PIPE_SHADER_FRAGMENT, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT); - commit_views = FALSE; commit_samplers = FALSE; - sampler_mask = state->vs ? state->vs->sampler_mask : 0; + sampler_mask = state->programmable_vs ? state->vs->sampler_mask : 0; state->bound_samplers_mask_vs = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) { const unsigned s = NINE_SAMPLER_VS(i); @@ -816,76 +808,181 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_VERTEX, - s - NINE_SAMPLER_VS(0), &samp); + s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } state->bound_samplers_mask_vs |= (1 << s); } - commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, - num_textures, view); + + cso_set_sampler_views(device->cso, PIPE_SHADER_VERTEX, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX); - state->changed.srgb = FALSE; state->changed.texture = 0; } +/* State commit only */ + +static inline void +commit_blend(struct NineDevice9 *device) +{ + cso_set_blend(device->cso, &device->state.pipe.blend); +} + +static inline void +commit_dsa(struct NineDevice9 *device) +{ + cso_set_depth_stencil_alpha(device->cso, &device->state.pipe.dsa); +} + +static inline void +commit_scissor(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor); +} + +static inline void +commit_rasterizer(struct NineDevice9 *device) +{ + cso_set_rasterizer(device->cso, &device->state.pipe.rast); +} + +static inline void +commit_index_buffer(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + if (device->state.idxbuf) + pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer); + else + pipe->set_index_buffer(pipe, NULL); +} + +static inline void +commit_vs_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + if (unlikely(!device->state.programmable_vs)) + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff); + else + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); +} + +static inline void +commit_ps_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + if (unlikely(!device->state.ps)) + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps_ff); + else + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps); +} + +static inline void +commit_vs(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + device->pipe->bind_vs_state(device->pipe, state->cso.vs); +} + + +static inline void +commit_ps(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; -#define NINE_STATE_FREQ_GROUP_0 \ - (NINE_STATE_FB | \ - NINE_STATE_VIEWPORT | \ - NINE_STATE_SCISSOR | \ - NINE_STATE_BLEND | \ - NINE_STATE_DSA | \ - NINE_STATE_RASTERIZER | \ - NINE_STATE_VS | \ - NINE_STATE_PS | \ - NINE_STATE_BLEND_COLOR | \ - NINE_STATE_STENCIL_REF | \ + device->pipe->bind_fs_state(device->pipe, state->cso.ps); +} +/* State Update */ + +#define NINE_STATE_SHADER_CHANGE_VS \ + (NINE_STATE_VS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER | \ + NINE_STATE_POINTSIZE_SHADER) + +#define NINE_STATE_SHADER_CHANGE_PS \ + (NINE_STATE_PS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER | \ + NINE_STATE_PS1X_SHADER) + +#define NINE_STATE_FREQUENT \ + (NINE_STATE_RASTERIZER | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_SAMPLER | \ + NINE_STATE_VS_CONST | \ + NINE_STATE_PS_CONST | \ + NINE_STATE_MULTISAMPLE) + +#define NINE_STATE_COMMON \ + (NINE_STATE_FB | \ + NINE_STATE_BLEND | \ + NINE_STATE_DSA | \ + NINE_STATE_VIEWPORT | \ + NINE_STATE_VDECL | \ + NINE_STATE_IDXBUF | \ + NINE_STATE_STREAMFREQ) + +#define NINE_STATE_RARE \ + (NINE_STATE_SCISSOR | \ + NINE_STATE_BLEND_COLOR | \ + NINE_STATE_STENCIL_REF | \ NINE_STATE_SAMPLE_MASK) -#define NINE_STATE_FREQ_GROUP_1 ~NINE_STATE_FREQ_GROUP_0 -#define NINE_STATE_SHADER_VARIANT_GROUP \ - (NINE_STATE_TEXTURE | \ - NINE_STATE_VS | \ - NINE_STATE_PS) +/* TODO: only go through dirty textures */ +static void +validate_textures(struct NineDevice9 *device) +{ + struct NineBaseTexture9 *tex, *ptr; + LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) { + list_delinit(&tex->list); + NineBaseTexture9_Validate(tex); + } +} + +static void +update_managed_buffers(struct NineDevice9 *device) +{ + struct NineBuffer9 *buf, *ptr; + LIST_FOR_EACH_ENTRY_SAFE(buf, ptr, &device->update_buffers, managed.list) { + list_delinit(&buf->managed.list); + NineBuffer9_Upload(buf); + } +} + +void +nine_update_state_framebuffer_clear(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + validate_textures(device); + + if (state->changed.group & NINE_STATE_FB) + update_framebuffer(device, TRUE); +} boolean -nine_update_state(struct NineDevice9 *device, uint32_t mask) +nine_update_state(struct NineDevice9 *device) { struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; uint32_t group; - DBG("changed state groups: %x | %x\n", - state->changed.group & NINE_STATE_FREQ_GROUP_0, - state->changed.group & NINE_STATE_FREQ_GROUP_1); + DBG("changed state groups: %x\n", state->changed.group); /* NOTE: We may want to use the cso cache for everything, or let * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't @@ -894,37 +991,83 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) * may be dirty anyway, even if no texture bindings changed. */ validate_textures(device); /* may clobber state */ + update_managed_buffers(device); /* ff_update may change VS/PS dirty bits */ - if ((mask & NINE_STATE_FF) && unlikely(!state->vs || !state->ps)) + if (unlikely(!state->programmable_vs || !state->ps)) nine_ff_update(device); - group = state->changed.group & mask; + group = state->changed.group; - if (group & NINE_STATE_SHADER_VARIANT_GROUP) - group |= update_shader_variant_keys(device); + if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) { + if (group & NINE_STATE_SHADER_CHANGE_VS) + group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/ + if (group & NINE_STATE_SHADER_CHANGE_PS) + group |= prepare_ps(device, (group & NINE_STATE_PS) != 0); + } - if (group & NINE_STATE_FREQ_GROUP_0) { + if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) { if (group & NINE_STATE_FB) - group = update_framebuffer(device) & mask; + update_framebuffer(device, FALSE); + if (group & NINE_STATE_BLEND) + prepare_blend(device); + if (group & NINE_STATE_DSA) + prepare_dsa(device); if (group & NINE_STATE_VIEWPORT) update_viewport(device); - if (group & NINE_STATE_SCISSOR) - update_scissor(device); - - if (group & NINE_STATE_DSA) - update_dsa(device); - if (group & NINE_STATE_BLEND) - update_blend(device); - - if (group & NINE_STATE_VS) - group |= update_vs(device); + if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ)) + update_vertex_elements(device); + if (group & NINE_STATE_IDXBUF) + commit_index_buffer(device); + } + if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) { + if (group & NINE_STATE_MULTISAMPLE) + group |= check_multisample(device); if (group & NINE_STATE_RASTERIZER) - update_rasterizer(device); + prepare_rasterizer(device); + if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) + update_textures_and_samplers(device); + if (device->prefer_user_constbuf) { + if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs) + prepare_vs_constants_userbuf(device); + if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) + prepare_ps_constants_userbuf(device); + } else { + if ((group & NINE_STATE_VS_CONST) && state->programmable_vs) + upload_constants(device, PIPE_SHADER_VERTEX); + if ((group & NINE_STATE_PS_CONST) && state->ps) + upload_constants(device, PIPE_SHADER_FRAGMENT); + } + } + + if (state->changed.vtxbuf) + update_vertex_buffers(device); - if (group & NINE_STATE_PS) - group |= update_ps(device); + if (state->commit & NINE_STATE_COMMIT_BLEND) + commit_blend(device); + if (state->commit & NINE_STATE_COMMIT_DSA) + commit_dsa(device); + if (state->commit & NINE_STATE_COMMIT_RASTERIZER) + commit_rasterizer(device); + if (state->commit & NINE_STATE_COMMIT_CONST_VS) + commit_vs_constants(device); + if (state->commit & NINE_STATE_COMMIT_CONST_PS) + commit_ps_constants(device); + if (state->commit & NINE_STATE_COMMIT_VS) + commit_vs(device); + if (state->commit & NINE_STATE_COMMIT_PS) + commit_ps(device); + + state->commit = 0; + + if (unlikely(state->changed.ucp)) { + pipe->set_clip_state(pipe, &state->clip); + state->changed.ucp = 0; + } + if (unlikely(group & NINE_STATE_RARE)) { + if (group & NINE_STATE_SCISSOR) + commit_scissor(device); if (group & NINE_STATE_BLEND_COLOR) { struct pipe_blend_color color; d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]); @@ -941,38 +1084,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) } } - if (state->changed.ucp) { - pipe->set_clip_state(pipe, &state->clip); - state->changed.ucp = 0; - } - - if (group & (NINE_STATE_FREQ_GROUP_1 | NINE_STATE_VS)) { - if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) - update_textures_and_samplers(device); - - if (group & NINE_STATE_IDXBUF) - update_index_buffer(device); - - if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || - state->changed.stream_freq & ~1) - update_vertex_elements(device); - - if (device->prefer_user_constbuf) { - if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) - update_vs_constants_userbuf(device); - if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) - update_ps_constants_userbuf(device); - } else { - if ((group & NINE_STATE_VS_CONST) && state->vs) - update_constants(device, PIPE_SHADER_VERTEX); - if ((group & NINE_STATE_PS_CONST) && state->ps) - update_constants(device, PIPE_SHADER_FRAGMENT); - } - } - if (state->changed.vtxbuf) - update_vertex_buffers(device); - - device->state.changed.group &= ~mask | + device->state.changed.group &= (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST); DBG("finished\n"); @@ -980,6 +1092,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) return TRUE; } +/* State defaults */ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] = { @@ -1094,7 +1207,8 @@ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] = [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD, [NINED3DRS_VSPOINTSIZE] = FALSE, [NINED3DRS_RTMASK] = 0xf, - [NINED3DRS_ALPHACOVERAGE] = FALSE + [NINED3DRS_ALPHACOVERAGE] = FALSE, + [NINED3DRS_MULTISAMPLE] = FALSE }; static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] = { @@ -1134,6 +1248,18 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] = [NINED3DSAMP_MINLOD] = 0, [NINED3DSAMP_SHADOW] = 0 }; + +void nine_state_restore_non_cso(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; +} + void nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, boolean is_reset) @@ -1145,15 +1271,16 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, */ memcpy(state->rs, nine_render_state_defaults, sizeof(state->rs)); - for (s = 0; s < Elements(state->ff.tex_stage); ++s) { + for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s) { memcpy(&state->ff.tex_stage[s], nine_tex_stage_state_defaults, sizeof(state->ff.tex_stage[s])); state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] = s; } state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE; state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1; + memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars)); - for (s = 0; s < Elements(state->samp); ++s) { + for (s = 0; s < ARRAY_SIZE(state->samp); ++s) { memcpy(&state->samp[s], nine_samp_state_defaults, sizeof(state->samp[s])); } @@ -1167,9 +1294,14 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, */ state->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize); + memcpy(state->rs_advertised, state->rs, sizeof(state->rs)); + /* Set changed flags to initialize driver. */ state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; state->ff.changed.transform[0] = ~0; state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32); @@ -1179,13 +1311,30 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, state->viewport.MaxZ = 1.0f; } - for (s = 0; s < Elements(state->changed.sampler); ++s) + for (s = 0; s < ARRAY_SIZE(state->changed.sampler); ++s) state->changed.sampler[s] = ~0; if (!is_reset) { state->dummy_vbo_bound_at = -1; state->vbo_bound_done = FALSE; } + + if (!device->prefer_user_constbuf) { + /* fill cb_vs and cb_ps for the non user constbuf path */ + struct pipe_constant_buffer cb; + + cb.buffer_offset = 0; + cb.buffer_size = device->vs_const_size; + cb.buffer = device->constbuf_vs; + cb.user_buffer = NULL; + state->pipe.cb_vs = cb; + + cb.buffer_size = device->ps_const_size; + cb.buffer = device->constbuf_ps; + state->pipe.cb_ps = cb; + + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; + } } void @@ -1193,14 +1342,16 @@ nine_state_clear(struct nine_state *state, const boolean device) { unsigned i; - for (i = 0; i < Elements(state->rt); ++i) + for (i = 0; i < ARRAY_SIZE(state->rt); ++i) nine_bind(&state->rt[i], NULL); nine_bind(&state->ds, NULL); nine_bind(&state->vs, NULL); nine_bind(&state->ps, NULL); nine_bind(&state->vdecl, NULL); - for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) { nine_bind(&state->stream[i], NULL); + pipe_resource_reference(&state->vtxbuf[i].buffer, NULL); + } nine_bind(&state->idxbuf, NULL); for (i = 0; i < NINE_MAX_SAMPLERS; ++i) { if (device && @@ -1341,7 +1492,7 @@ const uint32_t nine_render_states_vertex[(NINED3DRS_LAST + 31) / 32] = /* TODO: put in the right values */ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = { - [D3DRS_ZENABLE] = NINE_STATE_DSA, + [D3DRS_ZENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE, [D3DRS_FILLMODE] = NINE_STATE_RASTERIZER, [D3DRS_SHADEMODE] = NINE_STATE_RASTERIZER, [D3DRS_ZWRITEENABLE] = NINE_STATE_DSA, @@ -1353,17 +1504,17 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_ZFUNC] = NINE_STATE_DSA, [D3DRS_ALPHAREF] = NINE_STATE_DSA, [D3DRS_ALPHAFUNC] = NINE_STATE_DSA, - [D3DRS_DITHERENABLE] = NINE_STATE_RASTERIZER, + [D3DRS_DITHERENABLE] = NINE_STATE_BLEND, [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND, - [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER, + [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST, [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING, - [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER, - [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER, - [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER, - [D3DRS_FOGEND] = NINE_STATE_FF_OTHER, - [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER, + [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST, + [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER, - [D3DRS_STENCILENABLE] = NINE_STATE_DSA, + [D3DRS_STENCILENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE, [D3DRS_STENCILFAIL] = NINE_STATE_DSA, [D3DRS_STENCILZFAIL] = NINE_STATE_DSA, [D3DRS_STENCILPASS] = NINE_STATE_DSA, @@ -1394,17 +1545,17 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER, [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER, - [D3DRS_POINTSIZE_MIN] = NINE_STATE_MISC_CONST, + [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER, [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_B] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_C] = NINE_STATE_FF_OTHER, - [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_RASTERIZER, + [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_MULTISAMPLE, [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK, [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED, [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED, - [D3DRS_POINTSIZE_MAX] = NINE_STATE_MISC_CONST, + [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER, [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER, [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND, [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER, @@ -1446,6 +1597,8 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND }; +/* Misc */ + D3DMATRIX * nine_state_access_transform(struct nine_state *state, D3DTRANSFORMSTATETYPE t, boolean alloc) @@ -1601,4 +1754,3 @@ const char *nine_d3drs_to_string(DWORD State) return "(invalid)"; } } -