From b13b217243f1700e9c88bfc91a4d85cc2c17ab60 Mon Sep 17 00:00:00 2001 From: Axel Davy Date: Tue, 18 Oct 2016 22:43:53 +0200 Subject: [PATCH] st/nine: Back all shader constants to nine_context For device vs shader float constants and may_swvp, the same tips than for the other constant types is used. Also memset the constants properly. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/device9.c | 138 ++++--- src/gallium/state_trackers/nine/nine_state.c | 349 +++++++++++++----- src/gallium/state_trackers/nine/nine_state.h | 68 +++- src/gallium/state_trackers/nine/stateblock9.c | 82 +--- 4 files changed, 411 insertions(+), 226 deletions(-) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 3bfff86f56b..fb87f2c9070 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -345,24 +345,36 @@ NineDevice9_ctor( struct NineDevice9 *This, This->ps_const_size = max_const_ps * sizeof(float[4]); /* Include space for I,B constants for user constbuf. */ if (This->may_swvp) { - This->state.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); - if (!This->state.vs_const_f_swvp) + This->state.vs_const_f = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); + This->context.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); + if (!This->context.vs_const_f_swvp) return E_OUTOFMEMORY; This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); + This->context.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1); + This->context.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1); This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1); + This->context.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1); } else { - This->state.vs_const_f_swvp = NULL; + This->state.vs_const_f = CALLOC(NINE_MAX_CONST_F * sizeof(float[4]), 1); + This->context.vs_const_f_swvp = NULL; This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); + This->context.vs_lconstf_temp = CALLOC(This->vs_const_size,1); This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1); + This->context.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1); This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1); + This->context.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1); } - This->state.vs_const_f = CALLOC(This->vs_const_size, 1); + This->context.vs_const_f = CALLOC(This->vs_const_size, 1); This->state.ps_const_f = CALLOC(This->ps_const_size, 1); - This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1); - if (!This->state.vs_const_f || !This->state.ps_const_f || - !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp || - !This->state.vs_const_i || !This->state.vs_const_b) + This->context.ps_const_f = CALLOC(This->ps_const_size, 1); + This->context.ps_lconstf_temp = CALLOC(This->ps_const_size,1); + if (!This->state.vs_const_f || !This->context.vs_const_f || + !This->state.ps_const_f || !This->context.ps_const_f || + !This->state.vs_lconstf_temp || !This->context.vs_lconstf_temp || + !This->context.ps_lconstf_temp || + !This->state.vs_const_i || !This->context.vs_const_i || + !This->state.vs_const_b || !This->context.vs_const_b) return E_OUTOFMEMORY; if (strstr(pScreen->get_name(pScreen), "AMD") || @@ -505,12 +517,17 @@ NineDevice9_dtor( struct NineDevice9 *This ) pipe_resource_reference(&This->dummy_texture, NULL); pipe_resource_reference(&This->dummy_vbo, NULL); FREE(This->state.vs_const_f); + FREE(This->context.vs_const_f); FREE(This->state.ps_const_f); + FREE(This->context.ps_const_f); FREE(This->state.vs_lconstf_temp); - FREE(This->state.ps_lconstf_temp); + FREE(This->context.vs_lconstf_temp); + FREE(This->context.ps_lconstf_temp); FREE(This->state.vs_const_i); + FREE(This->context.vs_const_i); FREE(This->state.vs_const_b); - FREE(This->state.vs_const_f_swvp); + FREE(This->context.vs_const_b); + FREE(This->context.vs_const_f_swvp); pipe_resource_reference(&This->cursor.image, NULL); FREE(This->cursor.hw_upload_temp); @@ -3195,7 +3212,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, UINT Vector4fCount ) { struct nine_state *state = This->update; - float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f; + float *vs_const_f = state->vs_const_f; DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n", This, StartRegister, pConstantData, Vector4fCount); @@ -3207,29 +3224,29 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, return D3D_OK; user_assert(pConstantData, D3DERR_INVALIDCALL); - if (!This->is_recording) { - if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData, - Vector4fCount * 4 * sizeof(state->vs_const_f[0]))) - return D3D_OK; + if (unlikely(This->is_recording)) { + memcpy(&vs_const_f[StartRegister * 4], + pConstantData, + Vector4fCount * 4 * sizeof(state->vs_const_f[0])); + + nine_ranges_insert(&state->changed.vs_const_f, + StartRegister, StartRegister + Vector4fCount, + &This->range_pool); + + state->changed.group |= NINE_STATE_VS_CONST; + + return D3D_OK; } + if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData, + Vector4fCount * 4 * sizeof(state->vs_const_f[0]))) + return D3D_OK; + memcpy(&vs_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->vs_const_f[0])); - nine_ranges_insert(&state->changed.vs_const_f, - StartRegister, StartRegister + Vector4fCount, - &This->range_pool); - - if (This->may_swvp) { - Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister; - if (StartRegister < NINE_MAX_CONST_F) - memcpy(&state->vs_const_f[StartRegister * 4], - pConstantData, - Vector4fCount * 4 * sizeof(state->vs_const_f[0])); - } - - state->changed.group |= NINE_STATE_VS_CONST; + nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount); return D3D_OK; } @@ -3241,14 +3258,13 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, UINT Vector4fCount ) { const struct nine_state *state = &This->state; - float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f; user_assert(StartRegister < This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL); user_assert(StartRegister + Vector4fCount <= This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); memcpy(pConstantData, - &vs_const_f[StartRegister * 4], + &state->vs_const_f[StartRegister * 4], Vector4fCount * 4 * sizeof(state->vs_const_f[0])); return D3D_OK; @@ -3290,10 +3306,13 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, } } - nine_ranges_insert(&state->changed.vs_const_i, - StartRegister, StartRegister + Vector4iCount, - &This->range_pool); - state->changed.group |= NINE_STATE_VS_CONST; + if (unlikely(This->is_recording)) { + nine_ranges_insert(&state->changed.vs_const_i, + StartRegister, StartRegister + Vector4iCount, + &This->range_pool); + state->changed.group |= NINE_STATE_VS_CONST; + } else + nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount); return D3D_OK; } @@ -3361,10 +3380,13 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, for (i = 0; i < BoolCount; i++) state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; - nine_ranges_insert(&state->changed.vs_const_b, - StartRegister, StartRegister + BoolCount, - &This->range_pool); - state->changed.group |= NINE_STATE_VS_CONST; + if (unlikely(This->is_recording)) { + nine_ranges_insert(&state->changed.vs_const_b, + StartRegister, StartRegister + BoolCount, + &This->range_pool); + state->changed.group |= NINE_STATE_VS_CONST; + } else + nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData, BoolCount); return D3D_OK; } @@ -3604,21 +3626,28 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, return D3D_OK; user_assert(pConstantData, D3DERR_INVALIDCALL); - if (!This->is_recording) { - if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData, - Vector4fCount * 4 * sizeof(state->ps_const_f[0]))) - return D3D_OK; + if (unlikely(This->is_recording)) { + memcpy(&state->ps_const_f[StartRegister * 4], + pConstantData, + Vector4fCount * 4 * sizeof(state->ps_const_f[0])); + + nine_ranges_insert(&state->changed.ps_const_f, + StartRegister, StartRegister + Vector4fCount, + &This->range_pool); + + state->changed.group |= NINE_STATE_PS_CONST; + return D3D_OK; } + if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData, + Vector4fCount * 4 * sizeof(state->ps_const_f[0]))) + return D3D_OK; + memcpy(&state->ps_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->ps_const_f[0])); - nine_ranges_insert(&state->changed.ps_const_f, - StartRegister, StartRegister + Vector4fCount, - &This->range_pool); - - state->changed.group |= NINE_STATE_PS_CONST; + nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount); return D3D_OK; } @@ -3675,8 +3704,12 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, state->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3])); } } - state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister; - state->changed.group |= NINE_STATE_PS_CONST; + + if (unlikely(This->is_recording)) { + state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister; + state->changed.group |= NINE_STATE_PS_CONST; + } else + nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount); return D3D_OK; } @@ -3740,8 +3773,11 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, for (i = 0; i < BoolCount; i++) state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; - state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister; - state->changed.group |= NINE_STATE_PS_CONST; + if (unlikely(This->is_recording)) { + state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister; + state->changed.group |= NINE_STATE_PS_CONST; + } else + nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData, BoolCount); return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 17a6934740e..ef72cdc7ba3 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -88,18 +88,18 @@ prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device) struct nine_state *state = &device->state; struct nine_context *context = &device->context; - if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) { + if (context->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) { struct pipe_constant_buffer cb; cb.buffer_offset = 0; cb.buffer_size = 4096 * sizeof(float[4]); - cb.user_buffer = state->vs_const_f_swvp; + cb.user_buffer = context->vs_const_f_swvp; if (context->vs->lconstf.ranges) { const struct nine_lconstf *lconstf = &(context->vs->lconstf); const struct nine_range *r = lconstf->ranges; unsigned n = 0; - float *dst = device->state.vs_lconstf_temp; + float *dst = context->vs_lconstf_temp; float *src = (float *)cb.user_buffer; memcpy(dst, src, cb.buffer_size); while (r) { @@ -123,30 +123,34 @@ prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device) context->pipe.cb1_swvp.buffer_offset = cb.buffer_offset; context->pipe.cb1_swvp.buffer_size = cb.buffer_size; context->pipe.cb1_swvp.user_buffer = cb.user_buffer; + + context->changed.vs_const_f = 0; } - if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { + if (context->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { struct pipe_constant_buffer cb; cb.buffer_offset = 0; cb.buffer_size = 2048 * sizeof(float[4]); - cb.user_buffer = state->vs_const_i; + cb.user_buffer = context->vs_const_i; context->pipe.cb2_swvp.buffer_offset = cb.buffer_offset; context->pipe.cb2_swvp.buffer_size = cb.buffer_size; context->pipe.cb2_swvp.user_buffer = cb.user_buffer; + context->changed.vs_const_i = 0; } - if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { + if (context->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { struct pipe_constant_buffer cb; cb.buffer_offset = 0; cb.buffer_size = 512 * sizeof(float[4]); - cb.user_buffer = state->vs_const_b; + cb.user_buffer = context->vs_const_b; context->pipe.cb3_swvp.buffer_offset = cb.buffer_offset; context->pipe.cb3_swvp.buffer_size = cb.buffer_size; context->pipe.cb3_swvp.user_buffer = cb.user_buffer; + context->changed.vs_const_b = 0; } if (!device->driver_caps.user_cbufs) { @@ -195,33 +199,6 @@ prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device) cb->user_buffer = NULL; } - if (device->state.changed.vs_const_f) { - struct nine_range *r = device->state.changed.vs_const_f; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_f = NULL; - } - - if (device->state.changed.vs_const_i) { - struct nine_range *r = device->state.changed.vs_const_i; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_i = NULL; - } - - if (device->state.changed.vs_const_b) { - struct nine_range *r = device->state.changed.vs_const_b; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_b = NULL; - } - state->changed.group &= ~NINE_STATE_VS_CONST; context->commit |= NINE_STATE_COMMIT_CONST_VS; } @@ -235,40 +212,24 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = context->vs->const_used_size; - cb.user_buffer = device->state.vs_const_f; + cb.user_buffer = context->vs_const_f; if (device->swvp) { prepare_vs_constants_userbuf_swvp(device); return; } - if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { - int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; - memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4])); + if (context->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { + int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f]; + memcpy(idst, context->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4])); + context->changed.vs_const_i = 0; } - if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { - int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; + if (context->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { + int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f]; uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; - memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL)); - } - - if (device->state.changed.vs_const_i) { - struct nine_range *r = device->state.changed.vs_const_i; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_i = NULL; - } - - if (device->state.changed.vs_const_b) { - struct nine_range *r = device->state.changed.vs_const_b; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_b = NULL; + memcpy(bdst, context->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL)); + context->changed.vs_const_b = 0; } if (!cb.buffer_size) @@ -279,7 +240,7 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) const struct nine_lconstf *lconstf = &(context->vs->lconstf); const struct nine_range *r = lconstf->ranges; unsigned n = 0; - float *dst = device->state.vs_lconstf_temp; + float *dst = context->vs_lconstf_temp; float *src = (float *)cb.user_buffer; memcpy(dst, src, cb.buffer_size); while (r) { @@ -306,14 +267,7 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) } else context->pipe.cb_vs = cb; - if (device->state.changed.vs_const_f) { - struct nine_range *r = device->state.changed.vs_const_f; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.vs_const_f = NULL; - } + context->changed.vs_const_f = 0; state->changed.group &= ~NINE_STATE_VS_CONST; context->commit |= NINE_STATE_COMMIT_CONST_VS; @@ -328,34 +282,34 @@ prepare_ps_constants_userbuf(struct NineDevice9 *device) cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = device->state.ps->const_used_size; - cb.user_buffer = device->state.ps_const_f; + cb.user_buffer = context->ps_const_f; - if (state->changed.ps_const_i) { - int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; - memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); - state->changed.ps_const_i = 0; + if (context->changed.ps_const_i) { + int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f]; + memcpy(idst, context->ps_const_i, sizeof(context->ps_const_i)); + context->changed.ps_const_i = 0; } - if (state->changed.ps_const_b) { - int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; + if (context->changed.ps_const_b) { + int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f]; uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; - memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b)); - state->changed.ps_const_b = 0; + memcpy(bdst, context->ps_const_b, sizeof(context->ps_const_b)); + context->changed.ps_const_b = 0; } /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ if (device->state.ps->bumpenvmat_needed) { - memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); - memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars)); + memcpy(context->ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + memcpy(&context->ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars)); - cb.user_buffer = device->state.ps_lconstf_temp; + cb.user_buffer = context->ps_lconstf_temp; } if (state->ps->byte_code.version < 0x30 && context->rs[D3DRS_FOGENABLE]) { - float *dst = &state->ps_lconstf_temp[4 * 32]; - if (cb.user_buffer != state->ps_lconstf_temp) { - memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size); - cb.user_buffer = state->ps_lconstf_temp; + float *dst = &context->ps_lconstf_temp[4 * 32]; + if (cb.user_buffer != context->ps_lconstf_temp) { + memcpy(context->ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + cb.user_buffer = context->ps_lconstf_temp; } d3dcolor_to_rgba(dst, context->rs[D3DRS_FOGCOLOR]); @@ -385,14 +339,8 @@ prepare_ps_constants_userbuf(struct NineDevice9 *device) } else context->pipe.cb_ps = cb; - if (device->state.changed.ps_const_f) { - struct nine_range *r = device->state.changed.ps_const_f; - struct nine_range *p = r; - while (p->next) - p = p->next; - nine_range_pool_put_chain(&device->range_pool, r, p); - device->state.changed.ps_const_f = NULL; - } + context->changed.ps_const_f = 0; + state->changed.group &= ~NINE_STATE_PS_CONST; context->commit |= NINE_STATE_COMMIT_CONST_PS; } @@ -1330,6 +1278,139 @@ nine_context_set_vertex_shader(struct NineDevice9 *device, state->changed.group |= NINE_STATE_VS; } +void +nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device, + UINT StartRegister, + const float *pConstantData, + UINT Vector4fCount) +{ + struct nine_state *state = &device->state; + struct nine_context *context = &device->context; + float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f; + + memcpy(&vs_const_f[StartRegister * 4], + pConstantData, + Vector4fCount * 4 * sizeof(context->vs_const_f[0])); + + if (device->may_swvp) { + Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister; + if (StartRegister < NINE_MAX_CONST_F) + memcpy(&context->vs_const_f[StartRegister * 4], + pConstantData, + Vector4fCount * 4 * sizeof(context->vs_const_f[0])); + } + + context->changed.vs_const_f = TRUE; + state->changed.group |= NINE_STATE_VS_CONST; +} + + +void +nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device, + UINT StartRegister, + const int *pConstantData, + UINT Vector4iCount) +{ + struct nine_state *state = &device->state; + struct nine_context *context = &device->context; + int i; + + if (device->driver_caps.vs_integer) { + memcpy(&context->vs_const_i[4 * StartRegister], + pConstantData, + Vector4iCount * sizeof(int[4])); + } else { + for (i = 0; i < Vector4iCount; i++) { + context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i])); + context->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1])); + context->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2])); + context->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3])); + } + } + + context->changed.vs_const_i = TRUE; + state->changed.group |= NINE_STATE_VS_CONST; +} + +void +nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device, + UINT StartRegister, + const BOOL *pConstantData, + UINT BoolCount) +{ + struct nine_state *state = &device->state; + struct nine_context *context = &device->context; + int i; + uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f); + + for (i = 0; i < BoolCount; i++) + context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; + + context->changed.vs_const_b = TRUE; + state->changed.group |= NINE_STATE_VS_CONST; +} + +void +nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device, + UINT StartRegister, + const float *pConstantData, + UINT Vector4fCount) +{ + struct nine_state *state = &device->state; + struct nine_context *context = &device->context; + + memcpy(&context->ps_const_f[StartRegister * 4], + pConstantData, + Vector4fCount * 4 * sizeof(context->ps_const_f[0])); + + context->changed.ps_const_f = TRUE; + state->changed.group |= NINE_STATE_PS_CONST; +} + +void +nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device, + UINT StartRegister, + const int *pConstantData, + UINT Vector4iCount) +{ + struct nine_state *state = &device->state; + struct nine_context *context = &device->context; + int i; + + if (device->driver_caps.ps_integer) { + memcpy(&context->ps_const_i[StartRegister][0], + pConstantData, + Vector4iCount * sizeof(context->ps_const_i[0])); + } else { + for (i = 0; i < Vector4iCount; i++) { + context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i])); + context->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1])); + context->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2])); + context->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3])); + } + } + context->changed.ps_const_i = TRUE; + state->changed.group |= NINE_STATE_PS_CONST; +} + +void +nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device, + UINT StartRegister, + const BOOL *pConstantData, + UINT BoolCount) +{ + struct nine_state *state = &device->state; + struct nine_context *context = &device->context; + int i; + uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f); + + for (i = 0; i < BoolCount; i++) + context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; + + context->changed.ps_const_b = TRUE; + state->changed.group |= NINE_STATE_PS_CONST; +} + void nine_context_apply_stateblock(struct NineDevice9 *device, const struct nine_state *src) @@ -1411,6 +1492,70 @@ nine_context_apply_stateblock(struct NineDevice9 *device, nine_bind(&context->vs, src->vs); context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t); + + /* Vertex constants */ + if (src->changed.group & NINE_STATE_VS_CONST) { + struct nine_range *r; + if (device->may_swvp) { + for (r = src->changed.vs_const_f; r; r = r->next) { + int bgn = r->bgn; + int end = r->end; + memcpy(&context->vs_const_f_swvp[bgn * 4], + &src->vs_const_f[bgn * 4], + (end - bgn) * 4 * sizeof(float)); + if (bgn < device->max_vs_const_f) { + end = MIN2(end, device->max_vs_const_f); + memcpy(&context->vs_const_f[bgn * 4], + &src->vs_const_f[bgn * 4], + (end - bgn) * 4 * sizeof(float)); + } + } + } else { + for (r = src->changed.vs_const_f; r; r = r->next) { + memcpy(&context->vs_const_f[r->bgn * 4], + &src->vs_const_f[r->bgn * 4], + (r->end - r->bgn) * 4 * sizeof(float)); + } + } + for (r = src->changed.vs_const_i; r; r = r->next) { + memcpy(&context->vs_const_i[r->bgn * 4], + &src->vs_const_i[r->bgn * 4], + (r->end - r->bgn) * 4 * sizeof(int)); + } + for (r = src->changed.vs_const_b; r; r = r->next) { + memcpy(&context->vs_const_b[r->bgn], + &src->vs_const_b[r->bgn], + (r->end - r->bgn) * sizeof(int)); + } + context->changed.vs_const_f = !!src->changed.vs_const_f; + context->changed.vs_const_i = !!src->changed.vs_const_i; + context->changed.vs_const_b = !!src->changed.vs_const_b; + } + + /* Pixel constants */ + if (src->changed.group & NINE_STATE_PS_CONST) { + struct nine_range *r; + for (r = src->changed.ps_const_f; r; r = r->next) { + memcpy(&context->ps_const_f[r->bgn * 4], + &src->ps_const_f[r->bgn * 4], + (r->end - r->bgn) * 4 * sizeof(float)); + } + if (src->changed.ps_const_i) { + uint16_t m = src->changed.ps_const_i; + for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1) + if (m & 1) + memcpy(context->ps_const_i[i], src->ps_const_i[i], 4 * sizeof(int)); + } + if (src->changed.ps_const_b) { + uint16_t m = src->changed.ps_const_b; + for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1) + if (m & 1) + context->ps_const_b[i] = src->ps_const_b[i]; + } + context->changed.ps_const_f = !!src->changed.ps_const_f; + context->changed.ps_const_i = !!src->changed.ps_const_i; + context->changed.ps_const_b = !!src->changed.ps_const_b; + } } static void @@ -1874,10 +2019,20 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, sizeof(state->samp_advertised[s])); } - if (state->vs_const_f) - memset(state->vs_const_f, 0, device->vs_const_size); - if (state->ps_const_f) - memset(state->ps_const_f, 0, device->ps_const_size); + memset(state->vs_const_f, 0, VS_CONST_F_SIZE(device)); + memset(context->vs_const_f, 0, device->vs_const_size); + if (context->vs_const_f_swvp) + memset(context->vs_const_f_swvp, 0, NINE_MAX_CONST_F_SWVP * sizeof(float[4])); + memset(state->vs_const_i, 0, VS_CONST_I_SIZE(device)); + memset(context->vs_const_i, 0, VS_CONST_I_SIZE(device)); + memset(state->vs_const_b, 0, VS_CONST_B_SIZE(device)); + memset(context->vs_const_b, 0, VS_CONST_B_SIZE(device)); + memset(state->ps_const_f, 0, device->ps_const_size); + memset(context->ps_const_f, 0, device->ps_const_size); + memset(state->ps_const_i, 0, sizeof(state->ps_const_i)); + memset(context->ps_const_i, 0, sizeof(context->ps_const_i)); + memset(state->ps_const_b, 0, sizeof(state->ps_const_b)); + memset(context->ps_const_b, 0, sizeof(context->ps_const_b)); /* Cap dependent initial state: */ @@ -2117,7 +2272,7 @@ update_vs_constants_sw(struct NineDevice9 *device) cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = 4096 * sizeof(float[4]); - cb.user_buffer = state->vs_const_f_swvp; + cb.user_buffer = state->vs_const_f; if (state->vs->lconstf.ranges) { const struct nine_lconstf *lconstf = &device->state.vs->lconstf; diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index b6b8d76833d..e8519e982c7 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -116,6 +116,10 @@ ((nconstf) * 4 * sizeof(float) + \ NINE_MAX_CONST_I * 4 * sizeof(int)) +#define VS_CONST_F_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_F_SWVP * sizeof(float[4])) : (NINE_MAX_CONST_F * sizeof(float[4]))) +#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4]))) +#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL))) + #define NINE_MAX_TEXTURE_STAGES 8 @@ -142,11 +146,11 @@ struct nine_state uint32_t stream_freq; /* stateblocks only */ uint32_t texture; /* stateblocks only */ uint16_t sampler[NINE_MAX_SAMPLERS]; /* stateblocks only */ - struct nine_range *vs_const_f; - struct nine_range *ps_const_f; - struct nine_range *vs_const_i; + struct nine_range *vs_const_f; /* stateblocks only */ + struct nine_range *ps_const_f; /* stateblocks only */ + struct nine_range *vs_const_i; /* stateblocks only */ uint16_t ps_const_i; /* NINE_MAX_CONST_I == 16 */ - struct nine_range *vs_const_b; + struct nine_range *vs_const_b; /* stateblocks only */ uint16_t ps_const_b; /* NINE_MAX_CONST_B == 16 */ uint8_t ucp; } changed; @@ -164,16 +168,14 @@ struct nine_state */ struct NineVertexShader9 *vs; float *vs_const_f; - float *vs_const_f_swvp; int *vs_const_i; BOOL *vs_const_b; - float *vs_lconstf_temp; + float *vs_lconstf_temp; /* ProcessVertices */ struct NinePixelShader9 *ps; float *ps_const_f; int ps_const_i[NINE_MAX_CONST_I][4]; BOOL ps_const_b[NINE_MAX_CONST_B]; - float *ps_lconstf_temp; struct NineVertexDeclaration9 *vdecl; @@ -217,6 +219,12 @@ struct nine_context { struct { uint16_t sampler[NINE_MAX_SAMPLERS]; uint32_t vtxbuf; + BOOL vs_const_f; + BOOL vs_const_i; + BOOL vs_const_b; + BOOL ps_const_f; + BOOL ps_const_i; + BOOL ps_const_b; } changed; uint32_t bumpmap_vars[6 * NINE_MAX_TEXTURE_STAGES]; @@ -230,6 +238,16 @@ struct nine_context { struct NineVertexShader9 *vs; BOOL programmable_vs; + float *vs_const_f; + float *vs_const_f_swvp; + int *vs_const_i; + BOOL *vs_const_b; + float *vs_lconstf_temp; + + float *ps_const_f; + int ps_const_i[NINE_MAX_CONST_I][4]; + BOOL ps_const_b[NINE_MAX_CONST_B]; + float *ps_lconstf_temp; struct NineVertexDeclaration9 *vdecl; @@ -320,6 +338,42 @@ void nine_context_set_vertex_shader(struct NineDevice9 *device, struct NineVertexShader9 *pShader); +void +nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device, + UINT StartRegister, + const float *pConstantData, + UINT Vector4fCount); + +void +nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device, + UINT StartRegister, + const int *pConstantData, + UINT Vector4iCount); + +void +nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device, + UINT StartRegister, + const BOOL *pConstantData, + UINT BoolCount); + +void +nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device, + UINT StartRegister, + const float *pConstantData, + UINT Vector4fCount); + +void +nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device, + UINT StartRegister, + const int *pConstantData, + UINT Vector4iCount); + +void +nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device, + UINT StartRegister, + const BOOL *pConstantData, + UINT BoolCount); + void nine_context_apply_stateblock(struct NineDevice9 *device, const struct nine_state *src); diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c index 0c19703e64b..0aa69be16b9 100644 --- a/src/gallium/state_trackers/nine/stateblock9.c +++ b/src/gallium/state_trackers/nine/stateblock9.c @@ -30,10 +30,6 @@ /* XXX TODO: handling of lights is broken */ -#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4]))) -#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL))) -#define VS_CONST_F_SWVP_SIZE (NINE_MAX_CONST_F_SWVP * sizeof(float[4])) - HRESULT NineStateBlock9_ctor( struct NineStateBlock9 *This, struct NineUnknownParams *pParams, @@ -48,7 +44,7 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This, This->type = type; - This->state.vs_const_f = MALLOC(This->base.device->vs_const_size); + This->state.vs_const_f = MALLOC(VS_CONST_F_SIZE(This->base.device)); This->state.ps_const_f = MALLOC(This->base.device->ps_const_size); This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE(This->base.device)); This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE(This->base.device)); @@ -56,13 +52,6 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This, !This->state.vs_const_i || !This->state.vs_const_b) return E_OUTOFMEMORY; - if (This->base.device->may_swvp) { - This->state.vs_const_f_swvp = MALLOC(VS_CONST_F_SWVP_SIZE); - if (!This->state.vs_const_f_swvp) - return E_OUTOFMEMORY; - } else - This->state.vs_const_f_swvp = NULL; - return D3D_OK; } @@ -79,7 +68,6 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This ) FREE(state->ps_const_f); FREE(state->vs_const_i); FREE(state->vs_const_b); - FREE(state->vs_const_f_swvp); FREE(state->ff.light); @@ -138,51 +126,28 @@ nine_state_copy_common(struct NineDevice9 *device, * Various possibilities for optimization here, like creating a per-SB * constant buffer, or memcmp'ing for changes. * Will do that later depending on what works best for specific apps. + * + * Note: Currently when we apply stateblocks, it's always on the device state. + * Should it affect recording stateblocks ? Since it's on device state, there + * is no need to copy which ranges are dirty. If it turns out we should affect + * recording stateblocks, the info should be copied. */ if (mask->changed.group & NINE_STATE_VS_CONST) { struct nine_range *r; - if (device->may_swvp) { - for (r = mask->changed.vs_const_f; r; r = r->next) { - int bgn = r->bgn; - int end = r->end; - memcpy(&dst->vs_const_f_swvp[bgn * 4], - &src->vs_const_f_swvp[bgn * 4], - (end - bgn) * 4 * sizeof(float)); - if (apply) - nine_ranges_insert(&dst->changed.vs_const_f, bgn, end, - pool); - if (bgn < device->max_vs_const_f) { - end = MIN2(end, device->max_vs_const_f); - memcpy(&dst->vs_const_f[bgn * 4], - &src->vs_const_f[bgn * 4], - (end - bgn) * 4 * sizeof(float)); - } - } - } else { - for (r = mask->changed.vs_const_f; r; r = r->next) { - memcpy(&dst->vs_const_f[r->bgn * 4], - &src->vs_const_f[r->bgn * 4], - (r->end - r->bgn) * 4 * sizeof(float)); - if (apply) - nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, - pool); - } + for (r = mask->changed.vs_const_f; r; r = r->next) { + memcpy(&dst->vs_const_f[r->bgn * 4], + &src->vs_const_f[r->bgn * 4], + (r->end - r->bgn) * 4 * sizeof(float)); } for (r = mask->changed.vs_const_i; r; r = r->next) { memcpy(&dst->vs_const_i[r->bgn * 4], &src->vs_const_i[r->bgn * 4], (r->end - r->bgn) * 4 * sizeof(int)); - if (apply) - nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, - pool); } for (r = mask->changed.vs_const_b; r; r = r->next) { memcpy(&dst->vs_const_b[r->bgn], &src->vs_const_b[r->bgn], (r->end - r->bgn) * sizeof(int)); - if (apply) - nine_ranges_insert(&dst->changed.vs_const_b, r->bgn, r->end, - pool); } } @@ -193,25 +158,18 @@ nine_state_copy_common(struct NineDevice9 *device, memcpy(&dst->ps_const_f[r->bgn * 4], &src->ps_const_f[r->bgn * 4], (r->end - r->bgn) * 4 * sizeof(float)); - if (apply) - nine_ranges_insert(&dst->changed.ps_const_f, r->bgn, r->end, - pool); } if (mask->changed.ps_const_i) { uint16_t m = mask->changed.ps_const_i; for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1) if (m & 1) memcpy(dst->ps_const_i[i], src->ps_const_i[i], 4 * sizeof(int)); - if (apply) - dst->changed.ps_const_i |= mask->changed.ps_const_i; } if (mask->changed.ps_const_b) { uint16_t m = mask->changed.ps_const_b; for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1) if (m & 1) dst->ps_const_b[i] = src->ps_const_b[i]; - if (apply) - dst->changed.ps_const_b |= mask->changed.ps_const_b; } } @@ -395,23 +353,11 @@ nine_state_copy_common_all(struct NineDevice9 *device, * Will do that later depending on what works best for specific apps. */ if (1) { - struct nine_range *r = help->changed.vs_const_f; memcpy(&dst->vs_const_f[0], - &src->vs_const_f[0], device->max_vs_const_f * 4 * sizeof(float)); - if (device->may_swvp) - memcpy(dst->vs_const_f_swvp, - src->vs_const_f_swvp, VS_CONST_F_SWVP_SIZE); - if (apply) - nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool); + &src->vs_const_f[0], VS_CONST_F_SIZE(device)); memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE(device)); memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE(device)); - if (apply) { - r = help->changed.vs_const_i; - nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, pool); - r = help->changed.vs_const_b; - nine_ranges_insert(&dst->changed.vs_const_b, r->bgn, r->end, pool); - } } /* Pixel constants. */ @@ -419,15 +365,9 @@ nine_state_copy_common_all(struct NineDevice9 *device, struct nine_range *r = help->changed.ps_const_f; memcpy(&dst->ps_const_f[0], &src->ps_const_f[0], (r->end - r->bgn) * 4 * sizeof(float)); - if (apply) - nine_ranges_insert(&dst->changed.ps_const_f, r->bgn, r->end, pool); memcpy(dst->ps_const_i, src->ps_const_i, sizeof(dst->ps_const_i)); memcpy(dst->ps_const_b, src->ps_const_b, sizeof(dst->ps_const_b)); - if (apply) { - dst->changed.ps_const_i |= src->changed.ps_const_i; - dst->changed.ps_const_b |= src->changed.ps_const_b; - } } /* Render states. */ -- 2.30.2