X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fstate_trackers%2Fnine%2Fnine_state.c;h=a832a13a32a42a133e447ff4ded2505c046784c8;hb=eda1e6ece722b1f9f7083e419884ca4f7b025dd2;hp=e185b025134db4708e65e05597cd9a719a2fc62c;hpb=fcca7ff38a6ba923abd910f50a8e2bcf6560e6ae;p=mesa.git diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index e185b025134..a832a13a32a 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -23,8 +23,10 @@ #include "device9.h" #include "basetexture9.h" +#include "buffer9.h" #include "indexbuffer9.h" #include "surface9.h" +#include "vertexbuffer9.h" #include "vertexdeclaration9.h" #include "vertexshader9.h" #include "pixelshader9.h" @@ -35,9 +37,27 @@ #include "cso_cache/cso_context.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" +#include "util/u_box.h" +#include "util/u_simple_shaders.h" #define DBG_CHANNEL DBG_DEVICE +/* Check if some states need to be set dirty */ + +static inline DWORD +check_multisample(struct NineDevice9 *device) +{ + DWORD *rs = device->state.rs; + DWORD new_value = (rs[D3DRS_ZENABLE] || rs[D3DRS_STENCILENABLE]) && + device->state.rt[0]->desc.MultiSampleType >= 1 && + rs[D3DRS_MULTISAMPLEANTIALIAS]; + if (rs[NINED3DRS_MULTISAMPLE] != new_value) { + rs[NINED3DRS_MULTISAMPLE] = new_value; + return NINE_STATE_RASTERIZER; + } + return 0; +} + /* State preparation only */ static inline void @@ -57,160 +77,145 @@ prepare_dsa(struct NineDevice9 *device) static inline void prepare_rasterizer(struct NineDevice9 *device) { - nine_convert_rasterizer_state(&device->state.pipe.rast, device->state.rs); + nine_convert_rasterizer_state(device, &device->state.pipe.rast, device->state.rs); device->state.commit |= NINE_STATE_COMMIT_RASTERIZER; } static void -prepare_ps_constants_userbuf(struct NineDevice9 *device); - -#define DO_UPLOAD_CONST_F(buf,p,c,d) \ - do { \ - DBG("upload ConstantF [%u .. %u]\n", x, (x) + (c) - 1); \ - box.x = (p) * 4 * sizeof(float); \ - box.width = (c) * 4 * sizeof(float); \ - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, &((d)[p * 4]), \ - 0, 0); \ - } while(0) - -/* OK, this is a bit ugly ... */ -static void -upload_constants(struct NineDevice9 *device, unsigned shader_type) +prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; - struct pipe_resource *buf; - struct pipe_box box; - const void *data; - const float *const_f; - const int *const_i; - const BOOL *const_b; - uint32_t data_b[NINE_MAX_CONST_B]; - uint16_t dirty_i; - uint16_t dirty_b; - const unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE; - unsigned x = 0; /* silence warning */ - unsigned i, c; - struct nine_range *r, *p, *lconstf_ranges; - float *lconstf_data; - - box.y = 0; - box.z = 0; - box.height = 1; - box.depth = 1; - - if (shader_type == PIPE_SHADER_VERTEX) { - DBG("VS\n"); - buf = device->constbuf_vs; - - const_f = device->state.vs_const_f; - for (p = r = device->state.changed.vs_const_f; r; p = r, r = r->next) - DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f); - if (p) { - nine_range_pool_put_chain(&device->range_pool, - device->state.changed.vs_const_f, p); - device->state.changed.vs_const_f = NULL; + struct nine_state *state = &device->state; + + if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 4096 * sizeof(float[4]); + cb.user_buffer = state->vs_const_f_swvp; + + if (state->vs->lconstf.ranges) { + const struct nine_lconstf *lconstf = &device->state.vs->lconstf; + const struct nine_range *r = lconstf->ranges; + unsigned n = 0; + float *dst = device->state.vs_lconstf_temp; + float *src = (float *)cb.user_buffer; + memcpy(dst, src, cb.buffer_size); + while (r) { + unsigned p = r->bgn; + unsigned c = r->end - r->bgn; + memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float)); + n += c; + r = r->next; + } + cb.user_buffer = dst; } - dirty_i = device->state.changed.vs_const_i; - device->state.changed.vs_const_i = 0; - const_i = &device->state.vs_const_i[0][0]; + state->pipe.cb0_swvp = cb; - dirty_b = device->state.changed.vs_const_b; - device->state.changed.vs_const_b = 0; - const_b = device->state.vs_const_b; + cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]); + state->pipe.cb1_swvp = cb; + } - lconstf_ranges = device->state.vs->lconstf.ranges; - lconstf_data = device->state.vs->lconstf.data; + if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { + struct pipe_constant_buffer cb; - device->state.changed.group &= ~NINE_STATE_VS_CONST; - } else { - DBG("PS\n"); - /* features only implemented on the userbuf path */ - if (device->state.ps->bumpenvmat_needed || ( - device->state.ps->byte_code.version < 0x30 && - device->state.rs[D3DRS_FOGENABLE])) { - device->prefer_user_constbuf = TRUE; - prepare_ps_constants_userbuf(device); - return; - } - buf = device->constbuf_ps; - - const_f = device->state.ps_const_f; - for (p = r = device->state.changed.ps_const_f; r; p = r, r = r->next) - DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f); - if (p) { - nine_range_pool_put_chain(&device->range_pool, - device->state.changed.ps_const_f, p); - device->state.changed.ps_const_f = NULL; - } + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 2048 * sizeof(float[4]); + cb.user_buffer = state->vs_const_i; - dirty_i = device->state.changed.ps_const_i; - device->state.changed.ps_const_i = 0; - const_i = &device->state.ps_const_i[0][0]; - - dirty_b = device->state.changed.ps_const_b; - device->state.changed.ps_const_b = 0; - const_b = device->state.ps_const_b; - - lconstf_ranges = NULL; - lconstf_data = NULL; - - device->state.changed.group &= ~NINE_STATE_PS_CONST; - } - - /* write range from min to max changed, it's not much data */ - /* bool1 */ - if (dirty_b) { - c = util_last_bit(dirty_b); - i = ffs(dirty_b) - 1; - x = buf->width0 - (NINE_MAX_CONST_B - i) * 4; - c -= i; - memcpy(data_b, &(const_b[i]), c * sizeof(uint32_t)); - box.x = x; - box.width = c * 4; - DBG("upload ConstantB [%u .. %u]\n", x, x + c - 1); - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data_b, 0, 0); - } - - /* int4 */ - for (c = 0, i = 0; dirty_i; i++, dirty_i >>= 1) { - if (dirty_i & 1) { - if (!c) - x = i; - ++c; - } else - if (c) { - DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1); - data = &const_i[x * 4]; - box.x = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4; - box.x += x * 4 * sizeof(int); - box.width = c * 4 * sizeof(int); - c = 0; - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); - } + state->pipe.cb2_swvp = cb; + state->changed.vs_const_i = 0; } - if (c) { - DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1); - data = &const_i[x * 4]; - box.x = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4; - box.x += x * 4 * sizeof(int); - box.width = c * 4 * sizeof(int); - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); + + if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 512 * sizeof(float[4]); + cb.user_buffer = state->vs_const_b; + + state->pipe.cb3_swvp = cb; + state->changed.vs_const_b = 0; } - /* TODO: only upload these when shader itself changes */ - if (lconstf_ranges) { - unsigned n = 0; - struct nine_range *r = lconstf_ranges; - while (r) { - box.x = r->bgn * 4 * sizeof(float); - n += r->end - r->bgn; - box.width = (r->end - r->bgn) * 4 * sizeof(float); - data = &lconstf_data[4 * n]; - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); - r = r->next; - } + if (!device->driver_caps.user_cbufs) { + struct pipe_constant_buffer *cb = &(state->pipe.cb0_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + + cb = &(state->pipe.cb1_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + + cb = &(state->pipe.cb2_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + + cb = &(state->pipe.cb3_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + } + + if (device->state.changed.vs_const_f) { + struct nine_range *r = device->state.changed.vs_const_f; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_f = NULL; } + + if (device->state.changed.vs_const_i) { + struct nine_range *r = device->state.changed.vs_const_i; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_i = NULL; + } + + if (device->state.changed.vs_const_b) { + struct nine_range *r = device->state.changed.vs_const_b; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_b = NULL; + } + + state->changed.group &= ~NINE_STATE_VS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_VS; } static void @@ -223,21 +228,27 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) cb.buffer_size = device->state.vs->const_used_size; cb.user_buffer = device->state.vs_const_f; - if (!cb.buffer_size) + if (device->swvp) { + prepare_vs_constants_userbuf_swvp(device); return; + } - if (state->changed.vs_const_i) { + if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; - memcpy(idst, state->vs_const_i, sizeof(state->vs_const_i)); + memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4])); state->changed.vs_const_i = 0; } - if (state->changed.vs_const_b) { + + if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; - memcpy(bdst, state->vs_const_b, sizeof(state->vs_const_b)); + memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL)); state->changed.vs_const_b = 0; } + if (!cb.buffer_size) + return; + if (device->state.vs->lconstf.ranges) { /* TODO: Can we make it so that we don't have to copy everything ? */ const struct nine_lconstf *lconstf = &device->state.vs->lconstf; @@ -260,6 +271,7 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) u_upload_data(device->constbuf_uploader, 0, cb.buffer_size, + device->constbuf_alignment, cb.user_buffer, &cb.buffer_offset, &cb.buffer); @@ -277,6 +289,24 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) nine_range_pool_put_chain(&device->range_pool, r, p); device->state.changed.vs_const_f = NULL; } + + if (device->state.changed.vs_const_i) { + struct nine_range *r = device->state.changed.vs_const_i; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_i = NULL; + } + + if (device->state.changed.vs_const_b) { + struct nine_range *r = device->state.changed.vs_const_b; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_b = NULL; + } state->changed.group &= ~NINE_STATE_VS_CONST; state->commit |= NINE_STATE_COMMIT_CONST_VS; } @@ -336,6 +366,7 @@ prepare_ps_constants_userbuf(struct NineDevice9 *device) u_upload_data(device->constbuf_uploader, 0, cb.buffer_size, + device->constbuf_alignment, cb.user_buffer, &cb.buffer_offset, &cb.buffer); @@ -365,14 +396,14 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed) uint32_t changed_group = 0; int has_key_changed = 0; - if (likely(vs)) - has_key_changed = NineVertexShader9_UpdateKey(vs, state); + if (likely(state->programmable_vs)) + has_key_changed = NineVertexShader9_UpdateKey(vs, device); if (!shader_changed && !has_key_changed) return 0; /* likely because we dislike FF */ - if (likely(vs)) { + if (likely(state->programmable_vs)) { state->cso.vs = NineVertexShader9_GetVariant(vs); } else { vs = device->ff.vs; @@ -425,8 +456,8 @@ prepare_ps(struct NineDevice9 *device, uint8_t shader_changed) /* State preparation + State commit */ -static uint32_t -update_framebuffer(struct NineDevice9 *device) +static void +update_framebuffer(struct NineDevice9 *device, bool is_clear) { struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; @@ -436,7 +467,8 @@ update_framebuffer(struct NineDevice9 *device) unsigned w = rt0->desc.Width; unsigned h = rt0->desc.Height; D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; - unsigned mask = state->ps ? state->ps->rt_mask : 1; + unsigned ps_mask = state->ps ? state->ps->rt_mask : 1; + unsigned mask = is_clear ? 0xf : ps_mask; const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; DBG("\n"); @@ -496,33 +528,13 @@ update_framebuffer(struct NineDevice9 *device) pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ - if (fb->zsbuf) { - DWORD scale; - switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - scale = fui(1.0f); - break; - case PIPE_FORMAT_Z16_UNORM: - scale = fui((float)(1 << 16)); - break; - default: - scale = fui((float)(1 << 24)); - break; - } - if (state->rs[NINED3DRS_ZBIASSCALE] != scale) { - state->rs[NINED3DRS_ZBIASSCALE] = scale; - state->changed.group |= NINE_STATE_RASTERIZER; - } - } - - return state->changed.group; + if (is_clear && state->rt_mask == ps_mask) + state->changed.group &= ~NINE_STATE_FB; } static void update_viewport(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; const D3DVIEWPORT9 *vport = &device->state.viewport; struct pipe_viewport_state pvport; @@ -561,7 +573,7 @@ update_viewport(struct NineDevice9 *device) pvport.translate[1] -= 1.0f / 128.0f; } - pipe->set_viewport_states(pipe, 0, 1, &pvport); + cso_set_viewport(device->cso, &pvport); } /* Loop through VS inputs and pick the vertex elements with the declared @@ -585,7 +597,7 @@ update_vertex_elements(struct NineDevice9 *device) state->stream_usage_mask = 0; memset(vdecl_index_map, -1, 16); memset(used_streams, 0, device->caps.MaxStreams); - vs = device->state.vs ? device->state.vs : device->ff.vs; + vs = state->programmable_vs ? device->state.vs : device->ff.vs; if (vdecl) { for (n = 0; n < vs->num_inputs; ++n) { @@ -664,7 +676,6 @@ update_vertex_buffers(struct NineDevice9 *device) struct pipe_vertex_buffer dummy_vtxbuf; uint32_t mask = state->changed.vtxbuf; unsigned i; - unsigned start; DBG("mask=%x\n", mask); @@ -722,20 +733,16 @@ update_sampler_derived(struct nine_state *state, unsigned s) static void update_textures_and_samplers(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; struct pipe_sampler_view *view[NINE_MAX_SAMPLERS]; - struct pipe_sampler_state samp; unsigned num_textures; unsigned i; - boolean commit_views; boolean commit_samplers; uint16_t sampler_mask = state->ps ? state->ps->sampler_mask : device->ff.ps->sampler_mask; /* TODO: Can we reduce iterations here ? */ - commit_views = FALSE; commit_samplers = FALSE; state->bound_samplers_mask_ps = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) { @@ -765,26 +772,12 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT, - s - NINE_SAMPLER_PS(0), &samp); + s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } @@ -792,18 +785,13 @@ update_textures_and_samplers(struct NineDevice9 *device) state->bound_samplers_mask_ps |= (1 << s); } - commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - num_textures, view); + cso_set_sampler_views(device->cso, PIPE_SHADER_FRAGMENT, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT); - commit_views = FALSE; commit_samplers = FALSE; - sampler_mask = state->vs ? state->vs->sampler_mask : 0; + sampler_mask = state->programmable_vs ? state->vs->sampler_mask : 0; state->bound_samplers_mask_vs = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) { const unsigned s = NINE_SAMPLER_VS(i); @@ -832,42 +820,24 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_VERTEX, - s - NINE_SAMPLER_VS(0), &samp); + s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } state->bound_samplers_mask_vs |= (1 << s); } - commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, - num_textures, view); + + cso_set_sampler_views(device->cso, PIPE_SHADER_VERTEX, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX); - state->changed.srgb = FALSE; state->changed.texture = 0; } @@ -914,10 +884,18 @@ commit_vs_constants(struct NineDevice9 *device) { struct pipe_context *pipe = device->pipe; - if (unlikely(!device->state.vs)) + if (unlikely(!device->state.programmable_vs)) pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff); - else - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); + else { + if (device->swvp) { + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb0_swvp); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &device->state.pipe.cb1_swvp); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &device->state.pipe.cb2_swvp); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &device->state.pipe.cb3_swvp); + } else { + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); + } + } } static inline void @@ -949,21 +927,42 @@ commit_ps(struct NineDevice9 *device) } /* State Update */ -#define NINE_STATE_FREQ_GROUP_0 \ - (NINE_STATE_FB | \ - NINE_STATE_VIEWPORT | \ - NINE_STATE_SCISSOR | \ - NINE_STATE_BLEND | \ - NINE_STATE_DSA | \ - NINE_STATE_RASTERIZER | \ - NINE_STATE_VS | \ - NINE_STATE_PS | \ - NINE_STATE_BLEND_COLOR | \ - NINE_STATE_STENCIL_REF | \ - NINE_STATE_SAMPLE_MASK | \ - NINE_STATE_FOG_SHADER) - -#define NINE_STATE_FREQ_GROUP_1 ~NINE_STATE_FREQ_GROUP_0 +#define NINE_STATE_SHADER_CHANGE_VS \ + (NINE_STATE_VS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER | \ + NINE_STATE_POINTSIZE_SHADER | \ + NINE_STATE_SWVP) + +#define NINE_STATE_SHADER_CHANGE_PS \ + (NINE_STATE_PS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER | \ + NINE_STATE_PS1X_SHADER) + +#define NINE_STATE_FREQUENT \ + (NINE_STATE_RASTERIZER | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_SAMPLER | \ + NINE_STATE_VS_CONST | \ + NINE_STATE_PS_CONST | \ + NINE_STATE_MULTISAMPLE) + +#define NINE_STATE_COMMON \ + (NINE_STATE_FB | \ + NINE_STATE_BLEND | \ + NINE_STATE_DSA | \ + NINE_STATE_VIEWPORT | \ + NINE_STATE_VDECL | \ + NINE_STATE_IDXBUF | \ + NINE_STATE_STREAMFREQ) + +#define NINE_STATE_RARE \ + (NINE_STATE_SCISSOR | \ + NINE_STATE_BLEND_COLOR | \ + NINE_STATE_STENCIL_REF | \ + NINE_STATE_SAMPLE_MASK) + /* TODO: only go through dirty textures */ static void @@ -976,17 +975,25 @@ validate_textures(struct NineDevice9 *device) } } +static void +update_managed_buffers(struct NineDevice9 *device) +{ + struct NineBuffer9 *buf, *ptr; + LIST_FOR_EACH_ENTRY_SAFE(buf, ptr, &device->update_buffers, managed.list) { + list_delinit(&buf->managed.list); + NineBuffer9_Upload(buf); + } +} + void -nine_update_state_framebuffer(struct NineDevice9 *device) +nine_update_state_framebuffer_clear(struct NineDevice9 *device) { struct nine_state *state = &device->state; validate_textures(device); if (state->changed.group & NINE_STATE_FB) - update_framebuffer(device); - - state->changed.group &= ~NINE_STATE_FB; + update_framebuffer(device, TRUE); } boolean @@ -996,9 +1003,7 @@ nine_update_state(struct NineDevice9 *device) struct nine_state *state = &device->state; uint32_t group; - DBG("changed state groups: %x | %x\n", - state->changed.group & NINE_STATE_FREQ_GROUP_0, - state->changed.group & NINE_STATE_FREQ_GROUP_1); + DBG("changed state groups: %x\n", state->changed.group); /* NOTE: We may want to use the cso cache for everything, or let * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't @@ -1007,78 +1012,48 @@ nine_update_state(struct NineDevice9 *device) * may be dirty anyway, even if no texture bindings changed. */ validate_textures(device); /* may clobber state */ + update_managed_buffers(device); /* ff_update may change VS/PS dirty bits */ - if (unlikely(!state->vs || !state->ps)) + if (unlikely(!state->programmable_vs || !state->ps)) nine_ff_update(device); group = state->changed.group; - if (group & (NINE_STATE_FREQ_GROUP_0 | NINE_STATE_TEXTURE)) { - if (group & NINE_STATE_FB) - group = update_framebuffer(device); - if (group & NINE_STATE_VIEWPORT) - update_viewport(device); - if (group & NINE_STATE_SCISSOR) - commit_scissor(device); + if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) { + if (group & NINE_STATE_SHADER_CHANGE_VS) + group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/ + if (group & NINE_STATE_SHADER_CHANGE_PS) + group |= prepare_ps(device, (group & NINE_STATE_PS) != 0); + } - if (group & NINE_STATE_DSA) - prepare_dsa(device); + if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) { + if (group & NINE_STATE_FB) + update_framebuffer(device, FALSE); if (group & NINE_STATE_BLEND) prepare_blend(device); + if (group & NINE_STATE_DSA) + prepare_dsa(device); + if (group & NINE_STATE_VIEWPORT) + update_viewport(device); + if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ)) + update_vertex_elements(device); + if (group & NINE_STATE_IDXBUF) + commit_index_buffer(device); + } - if (group & (NINE_STATE_VS | NINE_STATE_TEXTURE | NINE_STATE_FOG_SHADER)) - group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); - + if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) { + if (group & NINE_STATE_MULTISAMPLE) + group |= check_multisample(device); if (group & NINE_STATE_RASTERIZER) prepare_rasterizer(device); - - if (group & (NINE_STATE_PS | NINE_STATE_TEXTURE | NINE_STATE_FOG_SHADER)) - group |= prepare_ps(device, (group & NINE_STATE_PS) != 0); - - if (group & NINE_STATE_BLEND_COLOR) { - struct pipe_blend_color color; - d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]); - pipe->set_blend_color(pipe, &color); - } - if (group & NINE_STATE_SAMPLE_MASK) { - pipe->set_sample_mask(pipe, state->rs[D3DRS_MULTISAMPLEMASK]); - } - if (group & NINE_STATE_STENCIL_REF) { - struct pipe_stencil_ref ref; - ref.ref_value[0] = state->rs[D3DRS_STENCILREF]; - ref.ref_value[1] = ref.ref_value[0]; - pipe->set_stencil_ref(pipe, &ref); - } - } - - if (state->changed.ucp) { - pipe->set_clip_state(pipe, &state->clip); - state->changed.ucp = 0; - } - - if (group & (NINE_STATE_FREQ_GROUP_1 | NINE_STATE_VS)) { if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) update_textures_and_samplers(device); - - if (group & NINE_STATE_IDXBUF) - commit_index_buffer(device); - - if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || - state->changed.stream_freq & ~1) - update_vertex_elements(device); - - if (device->prefer_user_constbuf) { - if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) - prepare_vs_constants_userbuf(device); - if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) - prepare_ps_constants_userbuf(device); - } else { - if ((group & NINE_STATE_VS_CONST) && state->vs) - upload_constants(device, PIPE_SHADER_VERTEX); - if ((group & NINE_STATE_PS_CONST) && state->ps) - upload_constants(device, PIPE_SHADER_FRAGMENT); - } + if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && state->programmable_vs) + prepare_vs_constants_userbuf(device); + if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) + prepare_ps_constants_userbuf(device); } + if (state->changed.vtxbuf) update_vertex_buffers(device); @@ -1099,6 +1074,30 @@ nine_update_state(struct NineDevice9 *device) state->commit = 0; + if (unlikely(state->changed.ucp)) { + pipe->set_clip_state(pipe, &state->clip); + state->changed.ucp = 0; + } + + if (unlikely(group & NINE_STATE_RARE)) { + if (group & NINE_STATE_SCISSOR) + commit_scissor(device); + if (group & NINE_STATE_BLEND_COLOR) { + struct pipe_blend_color color; + d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]); + pipe->set_blend_color(pipe, &color); + } + if (group & NINE_STATE_SAMPLE_MASK) { + pipe->set_sample_mask(pipe, state->rs[D3DRS_MULTISAMPLEMASK]); + } + if (group & NINE_STATE_STENCIL_REF) { + struct pipe_stencil_ref ref; + ref.ref_value[0] = state->rs[D3DRS_STENCILREF]; + ref.ref_value[1] = ref.ref_value[0]; + pipe->set_stencil_ref(pipe, &ref); + } + } + device->state.changed.group &= (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST); @@ -1222,7 +1221,8 @@ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] = [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD, [NINED3DRS_VSPOINTSIZE] = FALSE, [NINED3DRS_RTMASK] = 0xf, - [NINED3DRS_ALPHACOVERAGE] = FALSE + [NINED3DRS_ALPHACOVERAGE] = FALSE, + [NINED3DRS_MULTISAMPLE] = FALSE }; static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] = { @@ -1285,7 +1285,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, */ memcpy(state->rs, nine_render_state_defaults, sizeof(state->rs)); - for (s = 0; s < Elements(state->ff.tex_stage); ++s) { + for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s) { memcpy(&state->ff.tex_stage[s], nine_tex_stage_state_defaults, sizeof(state->ff.tex_stage[s])); state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] = s; @@ -1294,7 +1294,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1; memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars)); - for (s = 0; s < Elements(state->samp); ++s) { + for (s = 0; s < ARRAY_SIZE(state->samp); ++s) { memcpy(&state->samp[s], nine_samp_state_defaults, sizeof(state->samp[s])); } @@ -1308,6 +1308,8 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, */ state->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize); + memcpy(state->rs_advertised, state->rs, sizeof(state->rs)); + /* Set changed flags to initialize driver. */ state->changed.group = NINE_STATE_ALL; @@ -1323,30 +1325,13 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, state->viewport.MaxZ = 1.0f; } - for (s = 0; s < Elements(state->changed.sampler); ++s) + for (s = 0; s < ARRAY_SIZE(state->changed.sampler); ++s) state->changed.sampler[s] = ~0; if (!is_reset) { state->dummy_vbo_bound_at = -1; state->vbo_bound_done = FALSE; } - - if (!device->prefer_user_constbuf) { - /* fill cb_vs and cb_ps for the non user constbuf path */ - struct pipe_constant_buffer cb; - - cb.buffer_offset = 0; - cb.buffer_size = device->vs_const_size; - cb.buffer = device->constbuf_vs; - cb.user_buffer = NULL; - state->pipe.cb_vs = cb; - - cb.buffer_size = device->ps_const_size; - cb.buffer = device->constbuf_ps; - state->pipe.cb_ps = cb; - - state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; - } } void @@ -1354,14 +1339,16 @@ nine_state_clear(struct nine_state *state, const boolean device) { unsigned i; - for (i = 0; i < Elements(state->rt); ++i) + for (i = 0; i < ARRAY_SIZE(state->rt); ++i) nine_bind(&state->rt[i], NULL); nine_bind(&state->ds, NULL); nine_bind(&state->vs, NULL); nine_bind(&state->ps, NULL); nine_bind(&state->vdecl, NULL); - for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) { nine_bind(&state->stream[i], NULL); + pipe_resource_reference(&state->vtxbuf[i].buffer, NULL); + } nine_bind(&state->idxbuf, NULL); for (i = 0; i < NINE_MAX_SAMPLERS; ++i) { if (device && @@ -1372,6 +1359,367 @@ nine_state_clear(struct nine_state *state, const boolean device) } } +void +nine_state_init_sw(struct NineDevice9 *device) +{ + struct pipe_context *pipe_sw = device->pipe_sw; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_framebuffer_state fb; + + /* Only used with Streamout */ + memset(&rast, 0, sizeof(rast)); + rast.rasterizer_discard = true; + rast.point_quad_rasterization = 1; /* to make llvmpipe happy */ + cso_set_rasterizer(device->cso_sw, &rast); + + /* dummy settings */ + memset(&blend, 0, sizeof(blend)); + memset(&dsa, 0, sizeof(dsa)); + memset(&fb, 0, sizeof(fb)); + cso_set_blend(device->cso_sw, &blend); + cso_set_depth_stencil_alpha(device->cso_sw, &dsa); + cso_set_framebuffer(device->cso_sw, &fb); + cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false); + cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw)); +} + +/* There is duplication with update_vertex_elements. + * TODO: Share the code */ + +static void +update_vertex_elements_sw(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; + const struct NineVertexShader9 *vs; + unsigned n, b, i; + int index; + char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ + char used_streams[device->caps.MaxStreams]; + int dummy_vbo_stream = -1; + BOOL need_dummy_vbo = FALSE; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + + state->stream_usage_mask = 0; + memset(vdecl_index_map, -1, 16); + memset(used_streams, 0, device->caps.MaxStreams); + vs = state->programmable_vs ? device->state.vs : device->ff.vs; + + if (vdecl) { + for (n = 0; n < vs->num_inputs; ++n) { + DBG("looking up input %u (usage %u) from vdecl(%p)\n", + n, vs->input_map[n].ndecl, vdecl); + + for (i = 0; i < vdecl->nelems; i++) { + if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { + vdecl_index_map[n] = i; + used_streams[vdecl->elems[i].vertex_buffer_index] = 1; + break; + } + } + if (vdecl_index_map[n] < 0) + need_dummy_vbo = TRUE; + } + } else { + /* No vertex declaration. Likely will never happen in practice, + * but we need not crash on this */ + need_dummy_vbo = TRUE; + } + + if (need_dummy_vbo) { + for (i = 0; i < device->caps.MaxStreams; i++ ) { + if (!used_streams[i]) { + dummy_vbo_stream = i; + break; + } + } + } + /* there are less vertex shader inputs than stream slots, + * so if we need a slot for the dummy vbo, we should have found one */ + assert (!need_dummy_vbo || dummy_vbo_stream != -1); + + for (n = 0; n < vs->num_inputs; ++n) { + index = vdecl_index_map[n]; + if (index >= 0) { + ve[n] = vdecl->elems[index]; + b = ve[n].vertex_buffer_index; + state->stream_usage_mask |= 1 << b; + /* XXX wine just uses 1 here: */ + if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) + ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; + } else { + /* if the vertex declaration is incomplete compared to what the + * vertex shader needs, we bind a dummy vbo with 0 0 0 0. + * This is not precised by the spec, but is the behaviour + * tested on win */ + ve[n].vertex_buffer_index = dummy_vbo_stream; + ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[n].src_offset = 0; + ve[n].instance_divisor = 0; + } + } + + if (state->dummy_vbo_bound_at != dummy_vbo_stream) { + if (state->dummy_vbo_bound_at >= 0) + state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; + if (dummy_vbo_stream >= 0) { + state->changed.vtxbuf |= 1 << dummy_vbo_stream; + state->vbo_bound_done = FALSE; + } + state->dummy_vbo_bound_at = dummy_vbo_stream; + } + + cso_set_vertex_elements(device->cso_sw, vs->num_inputs, ve); +} + +static void +update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices) +{ + struct pipe_context *pipe = device->pipe; + struct pipe_context *pipe_sw = device->pipe_sw; + struct nine_state *state = &device->state; + struct pipe_vertex_buffer vtxbuf; + uint32_t mask = 0xf; + unsigned i; + + DBG("mask=%x\n", mask); + + assert (state->dummy_vbo_bound_at < 0); + /* TODO: handle dummy_vbo_bound_at */ + + for (i = 0; mask; mask >>= 1, ++i) { + if (mask & 1) { + if (state->vtxbuf[i].buffer) { + struct pipe_resource *buf; + struct pipe_box box; + + vtxbuf = state->vtxbuf[i]; + + DBG("Locking %p (offset %d, length %d)\n", vtxbuf.buffer, + vtxbuf.buffer_offset, num_vertices * vtxbuf.stride); + + u_box_1d(vtxbuf.buffer_offset + start_vertice * vtxbuf.stride, + num_vertices * vtxbuf.stride, &box); + buf = vtxbuf.buffer; + vtxbuf.user_buffer = pipe->transfer_map(pipe, buf, 0, PIPE_TRANSFER_READ, &box, + &(state->transfers_so[i])); + vtxbuf.buffer = NULL; + if (!device->driver_caps.user_sw_vbufs) { + u_upload_data(device->vertex_sw_uploader, + 0, + box.width, + 16, + vtxbuf.user_buffer, + &(vtxbuf.buffer_offset), + &(vtxbuf.buffer)); + u_upload_unmap(device->vertex_sw_uploader); + vtxbuf.user_buffer = NULL; + } + pipe_sw->set_vertex_buffers(pipe_sw, i, 1, &vtxbuf); + if (vtxbuf.buffer) + pipe_resource_reference(&vtxbuf.buffer, NULL); + } else + pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL); + } + } +} + +static void +update_vs_constants_sw(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_context *pipe_sw = device->pipe_sw; + + DBG("updating\n"); + + { + struct pipe_constant_buffer cb; + const void *buf; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 4096 * sizeof(float[4]); + cb.user_buffer = state->vs_const_f_swvp; + + if (state->vs->lconstf.ranges) { + const struct nine_lconstf *lconstf = &device->state.vs->lconstf; + const struct nine_range *r = lconstf->ranges; + unsigned n = 0; + float *dst = device->state.vs_lconstf_temp; + float *src = (float *)cb.user_buffer; + memcpy(dst, src, 8192 * sizeof(float[4])); + while (r) { + unsigned p = r->bgn; + unsigned c = r->end - r->bgn; + memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float)); + n += c; + r = r->next; + } + cb.user_buffer = dst; + } + + buf = cb.user_buffer; + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + + cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]); + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + + { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 2048 * sizeof(float[4]); + cb.user_buffer = state->vs_const_i; + + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + + { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 512 * sizeof(float[4]); + cb.user_buffer = state->vs_const_b; + + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + + { + struct pipe_constant_buffer cb; + const D3DVIEWPORT9 *vport = &device->state.viewport; + float viewport_data[8] = {(float)vport->Width * 0.5f, + (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f, + (float)vport->Width * 0.5f + (float)vport->X, + (float)vport->Height * 0.5f + (float)vport->Y, + vport->MinZ, 0.f}; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 2 * sizeof(float[4]); + cb.user_buffer = viewport_data; + + { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + +} + +void +nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out, + int start_vertice, int num_vertices, struct pipe_stream_output_info *so) +{ + struct nine_state *state = &device->state; + + struct NineVertexShader9 *vs = state->programmable_vs ? device->state.vs : device->ff.vs; + + assert(state->programmable_vs); + + DBG("Preparing draw\n"); + cso_set_vertex_shader_handle(device->cso_sw, + NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so)); + update_vertex_elements_sw(device); + update_vertex_buffers_sw(device, start_vertice, num_vertices); + update_vs_constants_sw(device); + DBG("Preparation succeeded\n"); +} + +void +nine_state_after_draw_sw(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_context *pipe = device->pipe; + struct pipe_context *pipe_sw = device->pipe_sw; + int i; + + for (i = 0; i < 4; i++) { + pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL); + if (state->transfers_so[i]) + pipe->transfer_unmap(pipe, state->transfers_so[i]); + state->transfers_so[i] = NULL; + } +} + +void +nine_state_destroy_sw(struct NineDevice9 *device) +{ + (void) device; + /* Everything destroyed with cso */ +} + /* static const DWORD nine_render_states_pixel[] = { @@ -1502,7 +1850,7 @@ const uint32_t nine_render_states_vertex[(NINED3DRS_LAST + 31) / 32] = /* TODO: put in the right values */ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = { - [D3DRS_ZENABLE] = NINE_STATE_DSA, + [D3DRS_ZENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE, [D3DRS_FILLMODE] = NINE_STATE_RASTERIZER, [D3DRS_SHADEMODE] = NINE_STATE_RASTERIZER, [D3DRS_ZWRITEENABLE] = NINE_STATE_DSA, @@ -1524,7 +1872,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER, - [D3DRS_STENCILENABLE] = NINE_STATE_DSA, + [D3DRS_STENCILENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE, [D3DRS_STENCILFAIL] = NINE_STATE_DSA, [D3DRS_STENCILZFAIL] = NINE_STATE_DSA, [D3DRS_STENCILPASS] = NINE_STATE_DSA, @@ -1555,17 +1903,17 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER, [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER, - [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER, + [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER, [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_B] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_C] = NINE_STATE_FF_OTHER, - [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_RASTERIZER, + [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_MULTISAMPLE, [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK, [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED, [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED, - [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER, + [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER, [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER, [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND, [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER,