From 993e68fa6a431a7c7c451c738e07d02a84ea40e4 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Tue, 24 Mar 2015 10:47:35 +0100 Subject: [PATCH] st/nine: Rework constant buffer state handling We have two paths: . One that uses a fixed constant buffer, and updates it when needed . One that uses a user constant buffer, and uploads it when needed. This patch separates the preparation of the constant buffer and the commit. It also removes NineDevice9_RestoreNonCSOState, which was used to restore all states. Instead the commit of the constant buffer is moved to nine_state, and the other field settings moved to other functions where more appropriate. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/device9.c | 69 +- src/gallium/state_trackers/nine/device9.h | 4 - src/gallium/state_trackers/nine/nine_state.c | 795 ++++++++++--------- src/gallium/state_trackers/nine/nine_state.h | 5 + src/gallium/state_trackers/nine/swapchain9.c | 2 +- 5 files changed, 435 insertions(+), 440 deletions(-) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 1416a388315..c5687615be8 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -119,68 +119,6 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf); } -void -NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask ) -{ - struct pipe_context *pipe = This->pipe; - - DBG("This=%p mask=%u\n", This, mask); - - if (mask & 0x1) { - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - cb.buffer_size = This->vs_const_size; - - if (This->prefer_user_constbuf) { - cb.buffer = NULL; - cb.user_buffer = This->state.vs_const_f; - if (!This->driver_caps.user_cbufs) { - u_upload_data(This->constbuf_uploader, - 0, - cb.buffer_size, - cb.user_buffer, - &cb.buffer_offset, - &cb.buffer); - u_upload_unmap(This->constbuf_uploader); - cb.user_buffer = NULL; - } - } else { - cb.buffer = This->constbuf_vs; - cb.user_buffer = NULL; - } - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); - - cb.buffer_size = This->ps_const_size; - if (This->prefer_user_constbuf) { - cb.user_buffer = This->state.ps_const_f; - if (!This->driver_caps.user_cbufs) { - u_upload_data(This->constbuf_uploader, - 0, - cb.buffer_size, - cb.user_buffer, - &cb.buffer_offset, - &cb.buffer); - u_upload_unmap(This->constbuf_uploader); - cb.user_buffer = NULL; - } - } else { - cb.buffer = This->constbuf_ps; - } - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); - } - - if (mask & 0x2) { - struct pipe_poly_stipple stipple; - memset(&stipple, ~0, sizeof(stipple)); - pipe->set_polygon_stipple(pipe, &stipple); - } - - This->state.changed.group = NINE_STATE_ALL; - This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1; - This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; - This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; -} - #define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n) HRESULT NineDevice9_ctor( struct NineDevice9 *This, @@ -455,7 +393,12 @@ NineDevice9_ctor( struct NineDevice9 *This, nine_ff_init(This); /* initialize fixed function code */ NineDevice9_SetDefaultState(This, FALSE); - NineDevice9_RestoreNonCSOState(This, ~0); + + { + struct pipe_poly_stipple stipple; + memset(&stipple, ~0, sizeof(stipple)); + This->pipe->set_polygon_stipple(This->pipe, &stipple); + } This->update = &This->state; nine_update_state(This); diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index 8b955a7a797..f109f3cefdb 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -184,10 +184,6 @@ NineDevice9_GetCSO( struct NineDevice9 *This ); const D3DCAPS9 * NineDevice9_GetCaps( struct NineDevice9 *This ); -/* Mask: 0x1 = constant buffers, 0x2 = stipple */ -void -NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask ); - /*** Direct3D public ***/ HRESULT WINAPI diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 8c2b6eb4bc0..1b9622b00f8 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -61,6 +61,275 @@ prepare_rasterizer(struct NineDevice9 *device) device->state.commit |= NINE_STATE_COMMIT_RASTERIZER; } +#define DO_UPLOAD_CONST_F(buf,p,c,d) \ + do { \ + DBG("upload ConstantF [%u .. %u]\n", x, (x) + (c) - 1); \ + box.x = (p) * 4 * sizeof(float); \ + box.width = (c) * 4 * sizeof(float); \ + pipe->transfer_inline_write(pipe, buf, 0, usage, &box, &((d)[p * 4]), \ + 0, 0); \ + } while(0) + +/* OK, this is a bit ugly ... */ +static void +upload_constants(struct NineDevice9 *device, unsigned shader_type) +{ + struct pipe_context *pipe = device->pipe; + struct pipe_resource *buf; + struct pipe_box box; + const void *data; + const float *const_f; + const int *const_i; + const BOOL *const_b; + uint32_t data_b[NINE_MAX_CONST_B]; + uint16_t dirty_i; + uint16_t dirty_b; + const unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE; + unsigned x = 0; /* silence warning */ + unsigned i, c; + struct nine_range *r, *p, *lconstf_ranges; + float *lconstf_data; + + box.y = 0; + box.z = 0; + box.height = 1; + box.depth = 1; + + if (shader_type == PIPE_SHADER_VERTEX) { + DBG("VS\n"); + buf = device->constbuf_vs; + + const_f = device->state.vs_const_f; + for (p = r = device->state.changed.vs_const_f; r; p = r, r = r->next) + DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f); + if (p) { + nine_range_pool_put_chain(&device->range_pool, + device->state.changed.vs_const_f, p); + device->state.changed.vs_const_f = NULL; + } + + dirty_i = device->state.changed.vs_const_i; + device->state.changed.vs_const_i = 0; + const_i = &device->state.vs_const_i[0][0]; + + dirty_b = device->state.changed.vs_const_b; + device->state.changed.vs_const_b = 0; + const_b = device->state.vs_const_b; + + lconstf_ranges = device->state.vs->lconstf.ranges; + lconstf_data = device->state.vs->lconstf.data; + + device->state.ff.clobber.vs_const = TRUE; + device->state.changed.group &= ~NINE_STATE_VS_CONST; + } else { + DBG("PS\n"); + buf = device->constbuf_ps; + + const_f = device->state.ps_const_f; + for (p = r = device->state.changed.ps_const_f; r; p = r, r = r->next) + DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f); + if (p) { + nine_range_pool_put_chain(&device->range_pool, + device->state.changed.ps_const_f, p); + device->state.changed.ps_const_f = NULL; + } + + dirty_i = device->state.changed.ps_const_i; + device->state.changed.ps_const_i = 0; + const_i = &device->state.ps_const_i[0][0]; + + dirty_b = device->state.changed.ps_const_b; + device->state.changed.ps_const_b = 0; + const_b = device->state.ps_const_b; + + lconstf_ranges = NULL; + lconstf_data = NULL; + + device->state.ff.clobber.ps_const = TRUE; + device->state.changed.group &= ~NINE_STATE_PS_CONST; + } + + /* write range from min to max changed, it's not much data */ + /* bool1 */ + if (dirty_b) { + c = util_last_bit(dirty_b); + i = ffs(dirty_b) - 1; + x = buf->width0 - (NINE_MAX_CONST_B - i) * 4; + c -= i; + memcpy(data_b, &(const_b[i]), c * sizeof(uint32_t)); + box.x = x; + box.width = c * 4; + DBG("upload ConstantB [%u .. %u]\n", x, x + c - 1); + pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data_b, 0, 0); + } + + /* int4 */ + for (c = 0, i = 0; dirty_i; i++, dirty_i >>= 1) { + if (dirty_i & 1) { + if (!c) + x = i; + ++c; + } else + if (c) { + DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1); + data = &const_i[x * 4]; + box.x = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4; + box.x += x * 4 * sizeof(int); + box.width = c * 4 * sizeof(int); + c = 0; + pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); + } + } + if (c) { + DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1); + data = &const_i[x * 4]; + box.x = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4; + box.x += x * 4 * sizeof(int); + box.width = c * 4 * sizeof(int); + pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); + } + + /* TODO: only upload these when shader itself changes */ + if (lconstf_ranges) { + unsigned n = 0; + struct nine_range *r = lconstf_ranges; + while (r) { + box.x = r->bgn * 4 * sizeof(float); + n += r->end - r->bgn; + box.width = (r->end - r->bgn) * 4 * sizeof(float); + data = &lconstf_data[4 * n]; + pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); + r = r->next; + } + } +} + +static void +prepare_vs_constants_userbuf(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_constant_buffer cb; + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = device->state.vs->const_used_size; + cb.user_buffer = device->state.vs_const_f; + + if (!cb.buffer_size) + return; + + if (state->changed.vs_const_i) { + int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; + memcpy(idst, state->vs_const_i, sizeof(state->vs_const_i)); + state->changed.vs_const_i = 0; + } + if (state->changed.vs_const_b) { + int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; + uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; + memcpy(bdst, state->vs_const_b, sizeof(state->vs_const_b)); + state->changed.vs_const_b = 0; + } + + if (device->state.vs->lconstf.ranges) { + /* TODO: Can we make it so that we don't have to copy everything ? */ + const struct nine_lconstf *lconstf = &device->state.vs->lconstf; + const struct nine_range *r = lconstf->ranges; + unsigned n = 0; + float *dst = device->state.vs_lconstf_temp; + float *src = (float *)cb.user_buffer; + memcpy(dst, src, cb.buffer_size); + while (r) { + unsigned p = r->bgn; + unsigned c = r->end - r->bgn; + memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float)); + n += c; + r = r->next; + } + cb.user_buffer = dst; + } + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_vs = cb; + + if (device->state.changed.vs_const_f) { + struct nine_range *r = device->state.changed.vs_const_f; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_f = NULL; + } + state->changed.group &= ~NINE_STATE_VS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_VS; +} + +static void +prepare_ps_constants_userbuf(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_constant_buffer cb; + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = device->state.ps->const_used_size; + cb.user_buffer = device->state.ps_const_f; + + if (!cb.buffer_size) + return; + + if (state->changed.ps_const_i) { + int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; + memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); + state->changed.ps_const_i = 0; + } + if (state->changed.ps_const_b) { + int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; + uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; + memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b)); + state->changed.ps_const_b = 0; + } + + /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ + if (device->state.ps->bumpenvmat_needed) { + memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars)); + + cb.user_buffer = device->state.ps_lconstf_temp; + } + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_ps = cb; + + if (device->state.changed.ps_const_f) { + struct nine_range *r = device->state.changed.ps_const_f; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.ps_const_f = NULL; + } + state->changed.group &= ~NINE_STATE_PS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_PS; +} + /* State preparation incremental */ /* State preparation + State commit */ @@ -252,404 +521,134 @@ update_vertex_elements(struct NineDevice9 *device) for (i = 0; i < device->caps.MaxStreams; i++ ) { if (!used_streams[i]) { dummy_vbo_stream = i; - break; - } - } - } - /* there are less vertex shader inputs than stream slots, - * so if we need a slot for the dummy vbo, we should have found one */ - assert (!need_dummy_vbo || dummy_vbo_stream != -1); - - for (n = 0; n < vs->num_inputs; ++n) { - index = vdecl_index_map[n]; - if (index >= 0) { - ve[n] = vdecl->elems[index]; - b = ve[n].vertex_buffer_index; - state->stream_usage_mask |= 1 << b; - /* XXX wine just uses 1 here: */ - if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) - ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; - } else { - /* if the vertex declaration is incomplete compared to what the - * vertex shader needs, we bind a dummy vbo with 0 0 0 0. - * This is not precised by the spec, but is the behaviour - * tested on win */ - ve[n].vertex_buffer_index = dummy_vbo_stream; - ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - ve[n].src_offset = 0; - ve[n].instance_divisor = 0; - } - } - - if (state->dummy_vbo_bound_at != dummy_vbo_stream) { - if (state->dummy_vbo_bound_at >= 0) - state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; - if (dummy_vbo_stream >= 0) { - state->changed.vtxbuf |= 1 << dummy_vbo_stream; - state->vbo_bound_done = FALSE; - } - state->dummy_vbo_bound_at = dummy_vbo_stream; - } - - cso_set_vertex_elements(device->cso, vs->num_inputs, ve); - - state->changed.stream_freq = 0; -} - -static inline uint32_t -update_shader_variant_keys(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - uint32_t mask = 0; - uint32_t vs_key = state->samplers_shadow; - uint32_t ps_key = state->samplers_shadow; - - vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0); - ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0); - - if (state->vs) vs_key &= state->vs->sampler_mask; - if (state->ps) { - if (unlikely(state->ps->byte_code.version < 0x20)) { - /* no depth textures, but variable targets */ - uint32_t m = state->ps->sampler_mask; - ps_key = 0; - while (m) { - int s = ffs(m) - 1; - m &= ~(1 << s); - ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); - } - } else { - ps_key &= state->ps->sampler_mask; - } - } - - if (state->vs && state->vs_key != vs_key) { - state->vs_key = vs_key; - mask |= NINE_STATE_VS; - } - if (state->ps && state->ps_key != ps_key) { - state->ps_key = ps_key; - mask |= NINE_STATE_PS; - } - return mask; -} - -static inline uint32_t -update_vs(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NineVertexShader9 *vs = state->vs; - uint32_t changed_group = 0; - - /* likely because we dislike FF */ - if (likely(vs)) { - state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key); - } else { - vs = device->ff.vs; - state->cso.vs = vs->variant.cso; - } - device->pipe->bind_vs_state(device->pipe, state->cso.vs); - - if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { - state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; - changed_group |= NINE_STATE_RASTERIZER; - } - - if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} - -static inline uint32_t -update_ps(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NinePixelShader9 *ps = state->ps; - uint32_t changed_group = 0; - - if (likely(ps)) { - state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key); - } else { - ps = device->ff.ps; - state->cso.ps = ps->variant.cso; - } - device->pipe->bind_fs_state(device->pipe, state->cso.ps); - - if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} - -#define DO_UPLOAD_CONST_F(buf,p,c,d) \ - do { \ - DBG("upload ConstantF [%u .. %u]\n", x, (x) + (c) - 1); \ - box.x = (p) * 4 * sizeof(float); \ - box.width = (c) * 4 * sizeof(float); \ - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, &((d)[p * 4]), \ - 0, 0); \ - } while(0) - -/* OK, this is a bit ugly ... */ -static void -update_constants(struct NineDevice9 *device, unsigned shader_type) -{ - struct pipe_context *pipe = device->pipe; - struct pipe_resource *buf; - struct pipe_box box; - const void *data; - const float *const_f; - const int *const_i; - const BOOL *const_b; - uint32_t data_b[NINE_MAX_CONST_B]; - uint16_t dirty_i; - uint16_t dirty_b; - const unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE; - unsigned x = 0; /* silence warning */ - unsigned i, c; - struct nine_range *r, *p, *lconstf_ranges; - float *lconstf_data; - - box.y = 0; - box.z = 0; - box.height = 1; - box.depth = 1; - - if (shader_type == PIPE_SHADER_VERTEX) { - DBG("VS\n"); - buf = device->constbuf_vs; - - const_f = device->state.vs_const_f; - for (p = r = device->state.changed.vs_const_f; r; p = r, r = r->next) - DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f); - if (p) { - nine_range_pool_put_chain(&device->range_pool, - device->state.changed.vs_const_f, p); - device->state.changed.vs_const_f = NULL; - } - - dirty_i = device->state.changed.vs_const_i; - device->state.changed.vs_const_i = 0; - const_i = &device->state.vs_const_i[0][0]; - - dirty_b = device->state.changed.vs_const_b; - device->state.changed.vs_const_b = 0; - const_b = device->state.vs_const_b; - - lconstf_ranges = device->state.vs->lconstf.ranges; - lconstf_data = device->state.vs->lconstf.data; - - device->state.ff.clobber.vs_const = TRUE; - device->state.changed.group &= ~NINE_STATE_VS_CONST; - } else { - DBG("PS\n"); - buf = device->constbuf_ps; - - const_f = device->state.ps_const_f; - for (p = r = device->state.changed.ps_const_f; r; p = r, r = r->next) - DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f); - if (p) { - nine_range_pool_put_chain(&device->range_pool, - device->state.changed.ps_const_f, p); - device->state.changed.ps_const_f = NULL; - } - - dirty_i = device->state.changed.ps_const_i; - device->state.changed.ps_const_i = 0; - const_i = &device->state.ps_const_i[0][0]; - - dirty_b = device->state.changed.ps_const_b; - device->state.changed.ps_const_b = 0; - const_b = device->state.ps_const_b; - - lconstf_ranges = NULL; - lconstf_data = NULL; - - device->state.ff.clobber.ps_const = TRUE; - device->state.changed.group &= ~NINE_STATE_PS_CONST; - } - - /* write range from min to max changed, it's not much data */ - /* bool1 */ - if (dirty_b) { - c = util_last_bit(dirty_b); - i = ffs(dirty_b) - 1; - x = buf->width0 - (NINE_MAX_CONST_B - i) * 4; - c -= i; - memcpy(data_b, &(const_b[i]), c * sizeof(uint32_t)); - box.x = x; - box.width = c * 4; - DBG("upload ConstantB [%u .. %u]\n", x, x + c - 1); - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data_b, 0, 0); + break; + } + } } + /* there are less vertex shader inputs than stream slots, + * so if we need a slot for the dummy vbo, we should have found one */ + assert (!need_dummy_vbo || dummy_vbo_stream != -1); - /* int4 */ - for (c = 0, i = 0; dirty_i; i++, dirty_i >>= 1) { - if (dirty_i & 1) { - if (!c) - x = i; - ++c; - } else - if (c) { - DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1); - data = &const_i[x * 4]; - box.x = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4; - box.x += x * 4 * sizeof(int); - box.width = c * 4 * sizeof(int); - c = 0; - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); + for (n = 0; n < vs->num_inputs; ++n) { + index = vdecl_index_map[n]; + if (index >= 0) { + ve[n] = vdecl->elems[index]; + b = ve[n].vertex_buffer_index; + state->stream_usage_mask |= 1 << b; + /* XXX wine just uses 1 here: */ + if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) + ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; + } else { + /* if the vertex declaration is incomplete compared to what the + * vertex shader needs, we bind a dummy vbo with 0 0 0 0. + * This is not precised by the spec, but is the behaviour + * tested on win */ + ve[n].vertex_buffer_index = dummy_vbo_stream; + ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[n].src_offset = 0; + ve[n].instance_divisor = 0; } } - if (c) { - DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1); - data = &const_i[x * 4]; - box.x = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4; - box.x += x * 4 * sizeof(int); - box.width = c * 4 * sizeof(int); - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); - } - /* TODO: only upload these when shader itself changes */ - if (lconstf_ranges) { - unsigned n = 0; - struct nine_range *r = lconstf_ranges; - while (r) { - box.x = r->bgn * 4 * sizeof(float); - n += r->end - r->bgn; - box.width = (r->end - r->bgn) * 4 * sizeof(float); - data = &lconstf_data[4 * n]; - pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0); - r = r->next; + if (state->dummy_vbo_bound_at != dummy_vbo_stream) { + if (state->dummy_vbo_bound_at >= 0) + state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; + if (dummy_vbo_stream >= 0) { + state->changed.vtxbuf |= 1 << dummy_vbo_stream; + state->vbo_bound_done = FALSE; } + state->dummy_vbo_bound_at = dummy_vbo_stream; } + + cso_set_vertex_elements(device->cso, vs->num_inputs, ve); + + state->changed.stream_freq = 0; } -static void -update_vs_constants_userbuf(struct NineDevice9 *device) +static inline uint32_t +update_shader_variant_keys(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - cb.buffer = NULL; - cb.buffer_offset = 0; - cb.buffer_size = device->state.vs->const_used_size; - cb.user_buffer = device->state.vs_const_f; - - if (!cb.buffer_size) - return; + uint32_t mask = 0; + uint32_t vs_key = state->samplers_shadow; + uint32_t ps_key = state->samplers_shadow; - if (state->changed.vs_const_i) { - int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; - memcpy(idst, state->vs_const_i, sizeof(state->vs_const_i)); - state->changed.vs_const_i = 0; - } - if (state->changed.vs_const_b) { - int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; - uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; - memcpy(bdst, state->vs_const_b, sizeof(state->vs_const_b)); - state->changed.vs_const_b = 0; - } + vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0); + ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0); - if (device->state.vs->lconstf.ranges) { - /* TODO: Can we make it so that we don't have to copy everything ? */ - const struct nine_lconstf *lconstf = &device->state.vs->lconstf; - const struct nine_range *r = lconstf->ranges; - unsigned n = 0; - float *dst = device->state.vs_lconstf_temp; - float *src = (float *)cb.user_buffer; - memcpy(dst, src, cb.buffer_size); - while (r) { - unsigned p = r->bgn; - unsigned c = r->end - r->bgn; - memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float)); - n += c; - r = r->next; + if (state->vs) vs_key &= state->vs->sampler_mask; + if (state->ps) { + if (unlikely(state->ps->byte_code.version < 0x20)) { + /* no depth textures, but variable targets */ + uint32_t m = state->ps->sampler_mask; + ps_key = 0; + while (m) { + int s = ffs(m) - 1; + m &= ~(1 << s); + ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); + } + } else { + ps_key &= state->ps->sampler_mask; } - cb.user_buffer = dst; } - if (!device->driver_caps.user_cbufs) { - u_upload_data(device->constbuf_uploader, - 0, - cb.buffer_size, - cb.user_buffer, - &cb.buffer_offset, - &cb.buffer); - u_upload_unmap(device->constbuf_uploader); - cb.user_buffer = NULL; + if (state->vs && state->vs_key != vs_key) { + state->vs_key = vs_key; + mask |= NINE_STATE_VS; } - - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); - - if (device->state.changed.vs_const_f) { - struct nine_range *r = device->state.changed.vs_const_f; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_f = NULL; + if (state->ps && state->ps_key != ps_key) { + state->ps_key = ps_key; + mask |= NINE_STATE_PS; } - state->changed.group &= ~NINE_STATE_VS_CONST; + return mask; } -static void -update_ps_constants_userbuf(struct NineDevice9 *device) +static inline uint32_t +update_vs(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - int i; - cb.buffer = NULL; - cb.buffer_offset = 0; - cb.buffer_size = device->state.ps->const_used_size; - cb.user_buffer = device->state.ps_const_f; - - if (!cb.buffer_size) - return; + struct NineVertexShader9 *vs = state->vs; + uint32_t changed_group = 0; - if (state->changed.ps_const_i) { - int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; - memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); - state->changed.ps_const_i = 0; - } - if (state->changed.ps_const_b) { - int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; - uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; - memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b)); - state->changed.ps_const_b = 0; + /* likely because we dislike FF */ + if (likely(vs)) { + state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key); + } else { + vs = device->ff.vs; + state->cso.vs = vs->variant.cso; } + device->pipe->bind_vs_state(device->pipe, state->cso.vs); - /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ - if (device->state.ps->bumpenvmat_needed) { - memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); - memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars)); - - cb.user_buffer = device->state.ps_lconstf_temp; + if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { + state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; + changed_group |= NINE_STATE_RASTERIZER; } - if (!device->driver_caps.user_cbufs) { - u_upload_data(device->constbuf_uploader, - 0, - cb.buffer_size, - cb.user_buffer, - &cb.buffer_offset, - &cb.buffer); - u_upload_unmap(device->constbuf_uploader); - cb.user_buffer = NULL; - } + if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + return changed_group; +} - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); +static inline uint32_t +update_ps(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct NinePixelShader9 *ps = state->ps; + uint32_t changed_group = 0; - if (device->state.changed.ps_const_f) { - struct nine_range *r = device->state.changed.ps_const_f; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.ps_const_f = NULL; + if (likely(ps)) { + state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key); + } else { + ps = device->ff.ps; + state->cso.ps = ps->variant.cso; } - state->changed.group &= ~NINE_STATE_PS_CONST; + device->pipe->bind_fs_state(device->pipe, state->cso.ps); + + if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + return changed_group; } static void @@ -905,6 +904,22 @@ commit_index_buffer(struct NineDevice9 *device) pipe->set_index_buffer(pipe, NULL); } +static inline void +commit_vs_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); +} + +static inline void +commit_ps_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps); +} + /* State Update */ #define NINE_STATE_FREQ_GROUP_0 \ @@ -1034,14 +1049,14 @@ nine_update_state(struct NineDevice9 *device) if (device->prefer_user_constbuf) { if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) - update_vs_constants_userbuf(device); + prepare_vs_constants_userbuf(device); if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) - update_ps_constants_userbuf(device); + prepare_ps_constants_userbuf(device); } else { if ((group & NINE_STATE_VS_CONST) && state->vs) - update_constants(device, PIPE_SHADER_VERTEX); + upload_constants(device, PIPE_SHADER_VERTEX); if ((group & NINE_STATE_PS_CONST) && state->ps) - update_constants(device, PIPE_SHADER_FRAGMENT); + upload_constants(device, PIPE_SHADER_FRAGMENT); } } if (state->changed.vtxbuf) @@ -1053,6 +1068,10 @@ nine_update_state(struct NineDevice9 *device) commit_dsa(device); if (state->commit & NINE_STATE_COMMIT_RASTERIZER) commit_rasterizer(device); + if (state->commit & NINE_STATE_COMMIT_CONST_VS) + commit_vs_constants(device); + if (state->commit & NINE_STATE_COMMIT_CONST_PS) + commit_ps_constants(device); state->commit = 0; @@ -1219,6 +1238,18 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] = [NINED3DSAMP_MINLOD] = 0, [NINED3DSAMP_SHADOW] = 0 }; + +void nine_state_restore_non_cso(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; +} + void nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, boolean is_reset) @@ -1256,6 +1287,9 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, /* Set changed flags to initialize driver. */ state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; state->ff.changed.transform[0] = ~0; state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32); @@ -1272,6 +1306,23 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, state->dummy_vbo_bound_at = -1; state->vbo_bound_done = FALSE; } + + if (!device->prefer_user_constbuf) { + /* fill cb_vs and cb_ps for the non user constbuf path */ + struct pipe_constant_buffer cb; + + cb.buffer_offset = 0; + cb.buffer_size = device->vs_const_size; + cb.buffer = device->constbuf_vs; + cb.user_buffer = NULL; + state->pipe.cb_vs = cb; + + cb.buffer_size = device->ps_const_size; + cb.buffer = device->constbuf_ps; + state->pipe.cb_ps = cb; + + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; + } } void diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 60e5d8fe291..ff6c1800ca1 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -81,6 +81,8 @@ #define NINE_STATE_COMMIT_DSA (1 << 0) #define NINE_STATE_COMMIT_RASTERIZER (1 << 1) #define NINE_STATE_COMMIT_BLEND (1 << 2) +#define NINE_STATE_COMMIT_CONST_VS (1 << 3) +#define NINE_STATE_COMMIT_CONST_PS (1 << 4) #define NINE_MAX_SIMULTANEOUS_RENDERTARGETS 4 @@ -218,6 +220,8 @@ struct nine_state struct pipe_depth_stencil_alpha_state dsa; struct pipe_rasterizer_state rast; struct pipe_blend_state blend; + struct pipe_constant_buffer cb_vs; + struct pipe_constant_buffer cb_ps; } pipe; }; @@ -235,6 +239,7 @@ struct NineDevice9; void nine_update_state_framebuffer(struct NineDevice9 *); boolean nine_update_state(struct NineDevice9 *); +void nine_state_restore_non_cso(struct NineDevice9 *device); void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *, boolean is_reset); void nine_state_clear(struct nine_state *, const boolean device); diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index 6f8e066965a..3f5be26fed7 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -597,7 +597,7 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r if (device->hud && resource) { hud_draw(device->hud, resource); /* XXX: no offset */ /* HUD doesn't clobber stipple */ - NineDevice9_RestoreNonCSOState(device, ~0x2); + nine_state_restore_non_cso(device); } } -- 2.30.2