From: Axel Davy Date: Mon, 19 Sep 2016 17:00:23 +0000 (+0200) Subject: st/nine: Initial ProcessVertices support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b9639c661fb6b52c8abb96c06263f85ca4bd78c1;p=mesa.git st/nine: Initial ProcessVertices support For now only VS 3 support is implemented. This enables The Sims 2 to work. Signed-off-by: Axel Davy --- diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index f1354904344..e2d4038e86b 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -152,6 +152,7 @@ NineDevice9_ctor( struct NineDevice9 *This, list_inithead(&This->managed_textures); This->screen = pScreen; + This->screen_sw = pCTX->ref; This->caps = *pCaps; This->d3d9 = pD3D9; This->params = *pCreationParameters; @@ -195,9 +196,13 @@ NineDevice9_ctor( struct NineDevice9 *This, This->pipe = This->screen->context_create(This->screen, NULL, 0); if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */ + This->pipe_sw = This->screen_sw->context_create(This->screen_sw, NULL, 0); + if (!This->pipe_sw) { return E_OUTOFMEMORY; } This->cso = cso_create_context(This->pipe); if (!This->cso) { return E_OUTOFMEMORY; } /* also a guess */ + This->cso_sw = cso_create_context(This->pipe_sw); + if (!This->cso_sw) { return E_OUTOFMEMORY; } /* Create first, it messes up our state. */ This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */ @@ -426,10 +431,14 @@ NineDevice9_ctor( struct NineDevice9 *This, This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS); This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS); This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS); + This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS); + This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS); if (!This->driver_caps.user_vbufs) This->vertex_uploader = u_upload_create(This->pipe, 65536, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM); + This->vertex_sw_uploader = u_upload_create(This->pipe_sw, 65536, + PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM); if (!This->driver_caps.user_ibufs) This->index_uploader = u_upload_create(This->pipe, 128 * 1024, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM); @@ -439,6 +448,9 @@ NineDevice9_ctor( struct NineDevice9 *This, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM); } + This->constbuf_sw_uploader = u_upload_create(This->pipe_sw, 128 * 1024, + PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM); + This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION); This->driver_caps.vs_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS); This->driver_caps.ps_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS); @@ -457,6 +469,8 @@ NineDevice9_ctor( struct NineDevice9 *This, This->update = &This->state; nine_update_state(This); + nine_state_init_sw(This); + ID3DPresentGroup_Release(This->present); return D3D_OK; @@ -473,6 +487,7 @@ NineDevice9_dtor( struct NineDevice9 *This ) if (This->pipe && This->cso) nine_pipe_context_clear(This); nine_ff_fini(This); + nine_state_destroy_sw(This); nine_state_clear(&This->state, TRUE); if (This->vertex_uploader) @@ -481,6 +496,10 @@ NineDevice9_dtor( struct NineDevice9 *This ) u_upload_destroy(This->index_uploader); if (This->constbuf_uploader) u_upload_destroy(This->constbuf_uploader); + if (This->vertex_sw_uploader) + u_upload_destroy(This->vertex_sw_uploader); + if (This->constbuf_sw_uploader) + u_upload_destroy(This->constbuf_sw_uploader); nine_bind(&This->record, NULL); @@ -502,13 +521,11 @@ NineDevice9_dtor( struct NineDevice9 *This ) FREE(This->swapchains); } - /* state stuff */ - if (This->pipe) { - if (This->cso) { - cso_destroy_context(This->cso); - } - if (This->pipe->destroy) { This->pipe->destroy(This->pipe); } - } + /* Destroy cso first */ + if (This->cso) { cso_destroy_context(This->cso); } + if (This->cso_sw) { cso_destroy_context(This->cso_sw); } + if (This->pipe && This->pipe->destroy) { This->pipe->destroy(This->pipe); } + if (This->pipe_sw && This->pipe_sw->destroy) { This->pipe_sw->destroy(This->pipe_sw); } if (This->present) { ID3DPresentGroup_Release(This->present); } if (This->d3d9) { IDirect3D9_Release(This->d3d9); } @@ -3166,9 +3183,6 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, return D3D_OK; } -/* TODO: Write to pDestBuffer directly if vertex declaration contains - * only f32 formats. - */ HRESULT NINE_WINAPI NineDevice9_ProcessVertices( struct NineDevice9 *This, UINT SrcStartIndex, @@ -3178,33 +3192,69 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags ) { - struct pipe_screen *screen = This->screen; + struct pipe_screen *screen_sw = This->screen_sw; + struct pipe_context *pipe_sw = This->pipe_sw; struct NineVertexDeclaration9 *vdecl = NineVertexDeclaration9(pVertexDecl); + struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer); struct NineVertexShader9 *vs; struct pipe_resource *resource; + struct pipe_transfer *transfer = NULL; + struct pipe_stream_output_info so; struct pipe_stream_output_target *target; struct pipe_draw_info draw; + struct pipe_box box; + unsigned offsets[1] = {0}; HRESULT hr; - unsigned buffer_offset, buffer_size; + unsigned buffer_size; + void *map; DBG("This=%p SrcStartIndex=%u DestIndex=%u VertexCount=%u " "pDestBuffer=%p pVertexDecl=%p Flags=%d\n", This, SrcStartIndex, DestIndex, VertexCount, pDestBuffer, pVertexDecl, Flags); - if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) - STUB(D3DERR_INVALIDCALL); + if (!screen_sw->get_param(screen_sw, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) { + DBG("ProcessVertices not supported\n"); + return D3DERR_INVALIDCALL; + } - nine_update_state(This); - /* TODO: Create shader with stream output. */ - STUB(D3DERR_INVALIDCALL); - struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer); + vs = This->state.programmable_vs ? This->state.vs : This->ff.vs; + /* Note: version is 0 for ff */ + user_assert(vdecl || (vs->byte_code.version < 0x30 && dst->desc.FVF), + D3DERR_INVALIDCALL); + if (!vdecl) { + DWORD FVF = dst->desc.FVF; + vdecl = util_hash_table_get(This->ff.ht_fvf, &FVF); + if (!vdecl) { + hr = NineVertexDeclaration9_new_from_fvf(This, FVF, &vdecl); + if (FAILED(hr)) + return hr; + vdecl->fvf = FVF; + util_hash_table_set(This->ff.ht_fvf, &vdecl->fvf, vdecl); + NineUnknown_ConvertRefToBind(NineUnknown(vdecl)); + } + } - vs = This->state.vs ? This->state.vs : This->ff.vs; + /* Flags: Can be 0 or D3DPV_DONOTCOPYDATA, and/or lock flags + * D3DPV_DONOTCOPYDATA -> Has effect only for ff. In particular + * if not set, everything from src will be used, and dst + * must match exactly the ff vs outputs. + * TODO: Handle all the checks, etc for ff */ + user_assert(vdecl->position_t || This->state.programmable_vs, + D3DERR_INVALIDCALL); + + /* TODO: Support vs < 3 and ff */ + user_assert(vs->byte_code.version == 0x30, + D3DERR_INVALIDCALL); + /* TODO: Not hardcode the constant buffers for swvp */ + user_assert(This->may_swvp, + D3DERR_INVALIDCALL); + + nine_state_prepare_draw_sw(This, vdecl, SrcStartIndex, VertexCount, &so); - buffer_size = VertexCount * vs->so->stride[0]; - if (1) { + buffer_size = VertexCount * so.stride[0] * 4; + { struct pipe_resource templ; memset(&templ, 0, sizeof(templ)); @@ -3217,49 +3267,50 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, templ.height0 = templ.depth0 = templ.array_size = 1; templ.last_level = templ.nr_samples = 0; - resource = This->screen->resource_create(This->screen, &templ); + resource = screen_sw->resource_create(screen_sw, &templ); if (!resource) return E_OUTOFMEMORY; - buffer_offset = 0; - } else { - /* SO matches vertex declaration */ - resource = NineVertexBuffer9_GetResource(dst); - buffer_offset = DestIndex * vs->so->stride[0]; } - target = This->pipe->create_stream_output_target(This->pipe, resource, - buffer_offset, - buffer_size); + target = pipe_sw->create_stream_output_target(pipe_sw, resource, + 0, buffer_size); if (!target) { pipe_resource_reference(&resource, NULL); return D3DERR_DRIVERINTERNALERROR; } - if (!vdecl) { - hr = NineVertexDeclaration9_new_from_fvf(This, dst->desc.FVF, &vdecl); - if (FAILED(hr)) - goto out; - } - init_draw_info(&draw, This, D3DPT_POINTLIST, VertexCount); draw.instance_count = 1; draw.indexed = FALSE; - draw.start = SrcStartIndex; + draw.start = 0; draw.index_bias = 0; - draw.min_index = SrcStartIndex; - draw.max_index = SrcStartIndex + VertexCount - 1; + draw.min_index = 0; + draw.max_index = VertexCount - 1; + + + pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets); - This->pipe->set_stream_output_targets(This->pipe, 1, &target, 0); - This->pipe->draw_vbo(This->pipe, &draw); - This->pipe->set_stream_output_targets(This->pipe, 0, NULL, 0); - This->pipe->stream_output_target_destroy(This->pipe, target); + pipe_sw->draw_vbo(pipe_sw, &draw); + + pipe_sw->set_stream_output_targets(pipe_sw, 0, NULL, 0); + pipe_sw->stream_output_target_destroy(pipe_sw, target); + + u_box_1d(0, VertexCount * so.stride[0] * 4, &box); + map = pipe_sw->transfer_map(pipe_sw, resource, 0, PIPE_TRANSFER_READ, &box, + &transfer); + if (!map) { + hr = D3DERR_DRIVERINTERNALERROR; + goto out; + } hr = NineVertexDeclaration9_ConvertStreamOutput(vdecl, dst, DestIndex, VertexCount, - resource, vs->so); + map, &so); + if (transfer) + pipe_sw->transfer_unmap(pipe_sw, transfer); + out: + nine_state_after_draw_sw(This); pipe_resource_reference(&resource, NULL); - if (!pVertexDecl) - NineUnknown_Release(NineUnknown(vdecl)); return hr; } diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index b6aa5e06531..12be643dc2c 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -52,8 +52,11 @@ struct NineDevice9 /* G3D context */ struct pipe_screen *screen; + struct pipe_screen *screen_sw; struct pipe_context *pipe; + struct pipe_context *pipe_sw; struct cso_context *cso; + struct cso_context *cso_sw; /* creation parameters */ D3DCAPS9 caps; @@ -115,6 +118,8 @@ struct NineDevice9 boolean user_vbufs; boolean user_ibufs; boolean user_cbufs; + boolean user_sw_vbufs; + boolean user_sw_cbufs; boolean window_space_position_support; boolean vs_integer; boolean ps_integer; @@ -128,6 +133,8 @@ struct NineDevice9 struct u_upload_mgr *vertex_uploader; struct u_upload_mgr *index_uploader; struct u_upload_mgr *constbuf_uploader; + struct u_upload_mgr *vertex_sw_uploader; + struct u_upload_mgr *constbuf_sw_uploader; unsigned constbuf_alignment; struct nine_range_pool range_pool; diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 2b573e6879e..20b6ed7044c 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -26,6 +26,7 @@ #include "device9.h" #include "nine_debug.h" #include "nine_state.h" +#include "vertexdeclaration9.h" #include "util/macros.h" #include "util/u_memory.h" @@ -467,6 +468,7 @@ struct shader_translator struct { struct ureg_dst *r; struct ureg_dst oPos; + struct ureg_dst oPos_out; /* the real output when doing streamout */ struct ureg_dst oFog; struct ureg_dst oPts; struct ureg_dst oCol[4]; @@ -511,6 +513,9 @@ struct shader_translator boolean indirect_const_access; boolean failure; + struct nine_vs_output_info output_info[16]; + int num_outputs; + struct nine_shader_info *info; int16_t op_info_map[D3DSIO_BREAKP + 1]; @@ -536,6 +541,17 @@ sm1_instruction_check(const struct sm1_instruction *insn) } } +static void +nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, + int mask, int output_index) +{ + tx->output_info[tx->num_outputs].output_semantic = Usage; + tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex; + tx->output_info[tx->num_outputs].mask = mask; + tx->output_info[tx->num_outputs].output_index = output_index; + tx->num_outputs++; +} + static boolean tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) { @@ -2137,6 +2153,12 @@ DECL_SPECIAL(DCL) assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing"); tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); + nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx); + if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) { + tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; + tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); + tx->regs.oPos = tx->regs.o[sem.reg.idx]; + } if (tgsi.Name == TGSI_SEMANTIC_PSIZE) { tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); @@ -3348,6 +3370,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) info->version = (tx->version.major << 4) | tx->version.minor; + tx->num_outputs = 0; + create_op_info_map(tx); } @@ -3361,6 +3385,26 @@ tx_dtor(struct shader_translator *tx) FREE(tx); } +/* CONST[0].xyz = width/2, -height/2, zmax-zmin + * CONST[1].xyz = x+width/2, y+height/2, zmin */ +static void +shader_add_vs_viewport_transform(struct shader_translator *tx) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_src c0 = NINE_CONSTANT_SRC(0); + struct ureg_src c1 = NINE_CONSTANT_SRC(1); + /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/ + + c0 = ureg_src_dimension(c0, 4); + c1 = ureg_src_dimension(c1, 4); + /* TODO: find out when we need to apply the viewport transformation or not. + * Likely will be XYZ vs XYZRHW in vdecl_out + * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0); + * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1); + */ + ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos)); +} + static void shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) { @@ -3412,10 +3456,10 @@ shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); } -#define GET_CAP(n) device->screen->get_param( \ - device->screen, PIPE_CAP_##n) -#define GET_SHADER_CAP(n) device->screen->get_shader_param( \ - device->screen, info->type, PIPE_SHADER_CAP_##n) +#define GET_CAP(n) screen->get_param( \ + screen, PIPE_CAP_##n) +#define GET_SHADER_CAP(n) screen->get_shader_param( \ + screen, info->type, PIPE_SHADER_CAP_##n) HRESULT nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) @@ -3423,6 +3467,8 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) struct shader_translator *tx; HRESULT hr = D3D_OK; const unsigned processor = info->type; + struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; + struct pipe_context *pipe = info->process_vertices ? device->pipe_sw : device->pipe; user_assert(processor != ~0, D3DERR_INVALIDCALL); @@ -3535,6 +3581,9 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) info->point_size = TRUE; } + if (info->process_vertices) + shader_add_vs_viewport_transform(tx); + ureg_END(tx->ureg); /* record local constants */ @@ -3627,6 +3676,9 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) ureg_DECL_constant2D(tx->ureg, 0, 511, 3); } + if (info->process_vertices) + ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */ + if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { unsigned count; const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count); @@ -3634,7 +3686,14 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) ureg_free_tokens(toks); } - info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe); + if (info->process_vertices) { + NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out, + tx->output_info, + tx->num_outputs, + &(info->so)); + info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so)); + } else + info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe); if (!info->cso) { hr = D3DERR_DRIVERINTERNALERROR; FREE(info->lconstf.data); diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index 092ae634d7d..72a28b8055f 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -26,10 +26,12 @@ #include "d3d9types.h" #include "d3d9caps.h" #include "nine_defines.h" +#include "nine_helpers.h" #include "pipe/p_state.h" /* PIPE_MAX_ATTRIBS */ #include "util/u_memory.h" struct NineDevice9; +struct NineVertexDeclaration9; struct nine_lconstf /* NOTE: both pointers should be FREE'd by the user */ { @@ -78,6 +80,18 @@ struct nine_shader_info uint8_t bumpenvmat_needed; boolean swvp_on; + + boolean process_vertices; + struct NineVertexDeclaration9 *vdecl_out; + struct pipe_stream_output_info so; +}; + +struct nine_vs_output_info +{ + BYTE output_semantic; + int output_semantic_index; + int mask; + int output_index; }; static inline void @@ -147,4 +161,65 @@ nine_shader_variants_free(struct nine_shader_variant *list) } } +struct nine_shader_variant_so +{ + struct nine_shader_variant_so *next; + struct NineVertexDeclaration9 *vdecl; + struct pipe_stream_output_info so; + void *cso; +}; + +static inline void * +nine_shader_variant_so_get(struct nine_shader_variant_so *list, + struct NineVertexDeclaration9 *vdecl, + struct pipe_stream_output_info *so) +{ + while (list->vdecl != vdecl && list->next) + list = list->next; + if (list->vdecl == vdecl) { + *so = list->so; + return list->cso; + } + return NULL; +} + +static inline boolean +nine_shader_variant_so_add(struct nine_shader_variant_so *list, + struct NineVertexDeclaration9 *vdecl, + struct pipe_stream_output_info *so, void *cso) +{ + if (list->vdecl == NULL) { /* first shader */ + list->next = NULL; + nine_bind(&list->vdecl, vdecl); + list->so = *so; + list->cso = cso; + return TRUE; + } + while (list->next) { + assert(list->vdecl != vdecl); + list = list->next; + } + list->next = MALLOC_STRUCT(nine_shader_variant_so); + if (!list->next) + return FALSE; + list->next->next = NULL; + nine_bind(&list->vdecl, vdecl); + list->next->so = *so; + list->next->cso = cso; + return TRUE; +} + +static inline void +nine_shader_variants_so_free(struct nine_shader_variant_so *list) +{ + while (list->next) { + struct nine_shader_variant_so *ptr = list->next; + list->next = ptr->next; + nine_bind(&ptr->vdecl, NULL); + FREE(ptr); + } + if (list->vdecl) + nine_bind(&list->vdecl, NULL); +} + #endif /* _NINE_SHADER_H_ */ diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 024e639f92f..a832a13a32a 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -26,6 +26,7 @@ #include "buffer9.h" #include "indexbuffer9.h" #include "surface9.h" +#include "vertexbuffer9.h" #include "vertexdeclaration9.h" #include "vertexshader9.h" #include "pixelshader9.h" @@ -36,6 +37,8 @@ #include "cso_cache/cso_context.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" +#include "util/u_box.h" +#include "util/u_simple_shaders.h" #define DBG_CHANNEL DBG_DEVICE @@ -1356,6 +1359,367 @@ nine_state_clear(struct nine_state *state, const boolean device) } } +void +nine_state_init_sw(struct NineDevice9 *device) +{ + struct pipe_context *pipe_sw = device->pipe_sw; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_framebuffer_state fb; + + /* Only used with Streamout */ + memset(&rast, 0, sizeof(rast)); + rast.rasterizer_discard = true; + rast.point_quad_rasterization = 1; /* to make llvmpipe happy */ + cso_set_rasterizer(device->cso_sw, &rast); + + /* dummy settings */ + memset(&blend, 0, sizeof(blend)); + memset(&dsa, 0, sizeof(dsa)); + memset(&fb, 0, sizeof(fb)); + cso_set_blend(device->cso_sw, &blend); + cso_set_depth_stencil_alpha(device->cso_sw, &dsa); + cso_set_framebuffer(device->cso_sw, &fb); + cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false); + cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw)); +} + +/* There is duplication with update_vertex_elements. + * TODO: Share the code */ + +static void +update_vertex_elements_sw(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; + const struct NineVertexShader9 *vs; + unsigned n, b, i; + int index; + char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ + char used_streams[device->caps.MaxStreams]; + int dummy_vbo_stream = -1; + BOOL need_dummy_vbo = FALSE; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + + state->stream_usage_mask = 0; + memset(vdecl_index_map, -1, 16); + memset(used_streams, 0, device->caps.MaxStreams); + vs = state->programmable_vs ? device->state.vs : device->ff.vs; + + if (vdecl) { + for (n = 0; n < vs->num_inputs; ++n) { + DBG("looking up input %u (usage %u) from vdecl(%p)\n", + n, vs->input_map[n].ndecl, vdecl); + + for (i = 0; i < vdecl->nelems; i++) { + if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { + vdecl_index_map[n] = i; + used_streams[vdecl->elems[i].vertex_buffer_index] = 1; + break; + } + } + if (vdecl_index_map[n] < 0) + need_dummy_vbo = TRUE; + } + } else { + /* No vertex declaration. Likely will never happen in practice, + * but we need not crash on this */ + need_dummy_vbo = TRUE; + } + + if (need_dummy_vbo) { + for (i = 0; i < device->caps.MaxStreams; i++ ) { + if (!used_streams[i]) { + dummy_vbo_stream = i; + break; + } + } + } + /* there are less vertex shader inputs than stream slots, + * so if we need a slot for the dummy vbo, we should have found one */ + assert (!need_dummy_vbo || dummy_vbo_stream != -1); + + for (n = 0; n < vs->num_inputs; ++n) { + index = vdecl_index_map[n]; + if (index >= 0) { + ve[n] = vdecl->elems[index]; + b = ve[n].vertex_buffer_index; + state->stream_usage_mask |= 1 << b; + /* XXX wine just uses 1 here: */ + if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) + ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; + } else { + /* if the vertex declaration is incomplete compared to what the + * vertex shader needs, we bind a dummy vbo with 0 0 0 0. + * This is not precised by the spec, but is the behaviour + * tested on win */ + ve[n].vertex_buffer_index = dummy_vbo_stream; + ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[n].src_offset = 0; + ve[n].instance_divisor = 0; + } + } + + if (state->dummy_vbo_bound_at != dummy_vbo_stream) { + if (state->dummy_vbo_bound_at >= 0) + state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; + if (dummy_vbo_stream >= 0) { + state->changed.vtxbuf |= 1 << dummy_vbo_stream; + state->vbo_bound_done = FALSE; + } + state->dummy_vbo_bound_at = dummy_vbo_stream; + } + + cso_set_vertex_elements(device->cso_sw, vs->num_inputs, ve); +} + +static void +update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices) +{ + struct pipe_context *pipe = device->pipe; + struct pipe_context *pipe_sw = device->pipe_sw; + struct nine_state *state = &device->state; + struct pipe_vertex_buffer vtxbuf; + uint32_t mask = 0xf; + unsigned i; + + DBG("mask=%x\n", mask); + + assert (state->dummy_vbo_bound_at < 0); + /* TODO: handle dummy_vbo_bound_at */ + + for (i = 0; mask; mask >>= 1, ++i) { + if (mask & 1) { + if (state->vtxbuf[i].buffer) { + struct pipe_resource *buf; + struct pipe_box box; + + vtxbuf = state->vtxbuf[i]; + + DBG("Locking %p (offset %d, length %d)\n", vtxbuf.buffer, + vtxbuf.buffer_offset, num_vertices * vtxbuf.stride); + + u_box_1d(vtxbuf.buffer_offset + start_vertice * vtxbuf.stride, + num_vertices * vtxbuf.stride, &box); + buf = vtxbuf.buffer; + vtxbuf.user_buffer = pipe->transfer_map(pipe, buf, 0, PIPE_TRANSFER_READ, &box, + &(state->transfers_so[i])); + vtxbuf.buffer = NULL; + if (!device->driver_caps.user_sw_vbufs) { + u_upload_data(device->vertex_sw_uploader, + 0, + box.width, + 16, + vtxbuf.user_buffer, + &(vtxbuf.buffer_offset), + &(vtxbuf.buffer)); + u_upload_unmap(device->vertex_sw_uploader); + vtxbuf.user_buffer = NULL; + } + pipe_sw->set_vertex_buffers(pipe_sw, i, 1, &vtxbuf); + if (vtxbuf.buffer) + pipe_resource_reference(&vtxbuf.buffer, NULL); + } else + pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL); + } + } +} + +static void +update_vs_constants_sw(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_context *pipe_sw = device->pipe_sw; + + DBG("updating\n"); + + { + struct pipe_constant_buffer cb; + const void *buf; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 4096 * sizeof(float[4]); + cb.user_buffer = state->vs_const_f_swvp; + + if (state->vs->lconstf.ranges) { + const struct nine_lconstf *lconstf = &device->state.vs->lconstf; + const struct nine_range *r = lconstf->ranges; + unsigned n = 0; + float *dst = device->state.vs_lconstf_temp; + float *src = (float *)cb.user_buffer; + memcpy(dst, src, 8192 * sizeof(float[4])); + while (r) { + unsigned p = r->bgn; + unsigned c = r->end - r->bgn; + memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float)); + n += c; + r = r->next; + } + cb.user_buffer = dst; + } + + buf = cb.user_buffer; + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + + cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]); + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + + { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 2048 * sizeof(float[4]); + cb.user_buffer = state->vs_const_i; + + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + + { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 512 * sizeof(float[4]); + cb.user_buffer = state->vs_const_b; + + if (!device->driver_caps.user_sw_cbufs) { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + + { + struct pipe_constant_buffer cb; + const D3DVIEWPORT9 *vport = &device->state.viewport; + float viewport_data[8] = {(float)vport->Width * 0.5f, + (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f, + (float)vport->Width * 0.5f + (float)vport->X, + (float)vport->Height * 0.5f + (float)vport->Y, + vport->MinZ, 0.f}; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 2 * sizeof(float[4]); + cb.user_buffer = viewport_data; + + { + u_upload_data(device->constbuf_sw_uploader, + 0, + cb.buffer_size, + 16, + cb.user_buffer, + &(cb.buffer_offset), + &(cb.buffer)); + u_upload_unmap(device->constbuf_sw_uploader); + cb.user_buffer = NULL; + } + + pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, &cb); + if (cb.buffer) + pipe_resource_reference(&cb.buffer, NULL); + } + +} + +void +nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out, + int start_vertice, int num_vertices, struct pipe_stream_output_info *so) +{ + struct nine_state *state = &device->state; + + struct NineVertexShader9 *vs = state->programmable_vs ? device->state.vs : device->ff.vs; + + assert(state->programmable_vs); + + DBG("Preparing draw\n"); + cso_set_vertex_shader_handle(device->cso_sw, + NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so)); + update_vertex_elements_sw(device); + update_vertex_buffers_sw(device, start_vertice, num_vertices); + update_vs_constants_sw(device); + DBG("Preparation succeeded\n"); +} + +void +nine_state_after_draw_sw(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + struct pipe_context *pipe = device->pipe; + struct pipe_context *pipe_sw = device->pipe_sw; + int i; + + for (i = 0; i < 4; i++) { + pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL); + if (state->transfers_so[i]) + pipe->transfer_unmap(pipe, state->transfers_so[i]); + state->transfers_so[i] = NULL; + } +} + +void +nine_state_destroy_sw(struct NineDevice9 *device) +{ + (void) device; + /* Everything destroyed with cso */ +} + /* static const DWORD nine_render_states_pixel[] = { diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 2aa424d46a7..05eb2c170d6 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -242,6 +242,9 @@ struct nine_state struct pipe_constant_buffer cb_vs_ff; struct pipe_constant_buffer cb_ps_ff; } pipe; + + /* sw */ + struct pipe_transfer *transfers_so[4]; }; /* map D3DRS -> NINE_STATE_x @@ -263,6 +266,15 @@ void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *, boolean is_reset); void nine_state_clear(struct nine_state *, const boolean device); +void nine_state_init_sw(struct NineDevice9 *device); +void nine_state_prepare_draw_sw(struct NineDevice9 *device, + struct NineVertexDeclaration9 *vdecl_out, + int start_vertice, + int num_vertices, + struct pipe_stream_output_info *so); +void nine_state_after_draw_sw(struct NineDevice9 *device); +void nine_state_destroy_sw(struct NineDevice9 *device); + /* If @alloc is FALSE, the return value may be a const identity matrix. * Therefore, do not modify if you set alloc to FALSE ! */ diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 8bf4f4bee27..9e280321e45 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -59,6 +59,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, info.sampler_ps1xtypes = 0x0; info.fog_enable = 0; info.projected = 0; + info.process_vertices = false; hr = nine_translate_shader(device, &info); if (FAILED(hr)) @@ -162,6 +163,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This ) info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE]; info.force_color_in_centroid = key >> 34 & 1; info.projected = (key >> 48) & 0xffff; + info.process_vertices = false; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c index 955cdbdba03..e1256e2f6d5 100644 --- a/src/gallium/state_trackers/nine/vertexdeclaration9.c +++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c @@ -24,12 +24,12 @@ #include "vertexbuffer9.h" #include "device9.h" #include "nine_helpers.h" +#include "nine_shader.h" #include "pipe/p_format.h" #include "pipe/p_context.h" #include "util/u_math.h" #include "util/u_format.h" -#include "util/u_box.h" #include "translate/translate.h" #define DBG_CHANNEL DBG_VERTEXDECLARATION @@ -409,6 +409,53 @@ NineVertexDeclaration9_new_from_fvf( struct NineDevice9 *pDevice, NINE_DEVICE_CHILD_NEW(VertexDeclaration9, ppOut, /* args */ pDevice, elems); } +void +NineVertexDeclaration9_FillStreamOutputInfo( + struct NineVertexDeclaration9 *This, + struct nine_vs_output_info *ShaderOutputsInfo, + unsigned numOutputs, + struct pipe_stream_output_info *so ) +{ + unsigned so_outputs = 0; + int i, j; + + memset(so, 0, sizeof(struct pipe_stream_output_info)); + + for (i = 0; i < numOutputs; i++) { + BYTE output_semantic = ShaderOutputsInfo[i].output_semantic; + unsigned output_semantic_index = ShaderOutputsInfo[i].output_semantic_index; + + for (j = 0; j < This->nelems; j++) { + if ((This->decls[j].Usage == output_semantic || + (output_semantic == D3DDECLUSAGE_POSITION && + This->decls[j].Usage == D3DDECLUSAGE_POSITIONT)) && + This->decls[j].UsageIndex == output_semantic_index) { + DBG("Matching %s %d: o%d -> %d\n", + nine_declusage_name(nine_d3d9_to_nine_declusage(This->decls[j].Usage, 0)), + This->decls[j].UsageIndex, i, j); + so->output[so_outputs].register_index = ShaderOutputsInfo[i].output_index; + so->output[so_outputs].start_component = 0; + if (ShaderOutputsInfo[i].mask & 8) + so->output[so_outputs].num_components = 4; + else if (ShaderOutputsInfo[i].mask & 4) + so->output[so_outputs].num_components = 3; + else if (ShaderOutputsInfo[i].mask & 2) + so->output[so_outputs].num_components = 2; + else + so->output[so_outputs].num_components = 1; + so->output[so_outputs].output_buffer = 0; + so->output[so_outputs].dst_offset = so_outputs * sizeof(float[4])/4; + so->output[so_outputs].stream = 0; + so_outputs++; + break; + } + } + } + + so->num_outputs = so_outputs; + so->stride[0] = so_outputs * sizeof(float[4])/4; +} + /* ProcessVertices runs stream output into a temporary buffer to capture * all outputs. * Now we have to convert them to the format and order set by the vertex @@ -422,17 +469,13 @@ NineVertexDeclaration9_ConvertStreamOutput( struct NineVertexBuffer9 *pDstBuf, UINT DestIndex, UINT VertexCount, - struct pipe_resource *pSrcBuf, + void *pSrcBuf, const struct pipe_stream_output_info *so ) { - struct pipe_context *pipe = This->base.device->pipe; - struct pipe_transfer *transfer = NULL; struct translate *translate; struct translate_key transkey; - struct pipe_box box; HRESULT hr; unsigned i; - void *src_map; void *dst_map; DBG("This=%p pDstBuf=%p DestIndex=%u VertexCount=%u pSrcBuf=%p so=%p\n", @@ -477,20 +520,12 @@ NineVertexDeclaration9_ConvertStreamOutput( if (FAILED(hr)) goto out; - src_map = pipe->transfer_map(pipe, pSrcBuf, 0, PIPE_TRANSFER_READ, &box, - &transfer); - if (!src_map) { - hr = D3DERR_DRIVERINTERNALERROR; - goto out; - } - translate->set_buffer(translate, 0, src_map, so->stride[0], ~0); + translate->set_buffer(translate, 0, pSrcBuf, so->stride[0] * 4, ~0); translate->run(translate, 0, VertexCount, 0, 0, dst_map); NineVertexBuffer9_Unlock(pDstBuf); out: - if (transfer) - pipe->transfer_unmap(pipe, transfer); translate->release(translate); /* TODO: cache these */ return hr; } diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h index 9d3b1bdca88..7b94f846fe7 100644 --- a/src/gallium/state_trackers/nine/vertexdeclaration9.h +++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h @@ -31,6 +31,7 @@ struct pipe_vertex_element; struct pipe_stream_output_info; struct NineDevice9; struct NineVertexBuffer9; +struct nine_vs_output_info; struct NineVertexDeclaration9 { @@ -78,6 +79,13 @@ NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This, D3DVERTEXELEMENT9 *pElement, UINT *pNumElements ); +void +NineVertexDeclaration9_FillStreamOutputInfo( + struct NineVertexDeclaration9 *This, + struct nine_vs_output_info *ShaderOutputsInfo, + unsigned numOutputs, + struct pipe_stream_output_info *so ); + /* Convert stream output data to the vertex declaration's format. */ HRESULT NineVertexDeclaration9_ConvertStreamOutput( @@ -85,7 +93,7 @@ NineVertexDeclaration9_ConvertStreamOutput( struct NineVertexBuffer9 *pDstBuf, UINT DestIndex, UINT VertexCount, - struct pipe_resource *pSrcBuf, + void *pSrcBuf, const struct pipe_stream_output_info *so ); #endif /* _NINE_VERTEXDECLARATION9_H_ */ diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index 92f8f6bb581..a8c7c9b97a8 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -23,10 +23,12 @@ #include "nine_helpers.h" #include "nine_shader.h" +#include "vertexdeclaration9.h" #include "vertexshader9.h" #include "device9.h" #include "pipe/p_context.h" +#include "cso_cache/cso_context.h" #define DBG_CHANNEL DBG_VERTEXSHADER @@ -64,6 +66,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, info.point_size_min = 0; info.point_size_max = 0; info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING); + info.process_vertices = false; hr = nine_translate_shader(device, &info); if (hr == D3DERR_INVALIDCALL && @@ -109,6 +112,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) if (This->base.device) { struct pipe_context *pipe = This->base.device->pipe; struct nine_shader_variant *var = &This->variant; + struct nine_shader_variant_so *var_so = &This->variant_so; do { if (var->cso) { @@ -119,6 +123,13 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) var = var->next; } while (var); + while (var_so && var_so->vdecl) { + if (var_so->cso) { + cso_delete_vertex_shader(This->base.device->cso_sw, var_so->cso ); + } + var_so = var_so->next; + } + if (This->ff_cso) { if (This->ff_cso == This->base.device->state.cso.vs) pipe->bind_vs_state(pipe, NULL); @@ -126,6 +137,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) } } nine_shader_variants_free(&This->variant); + nine_shader_variants_so_free(&This->variant_so); FREE((void *)This->byte_code.tokens); /* const_cast */ @@ -178,6 +190,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This ) info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]); info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]); info.swvp_on = device->swvp; + info.process_vertices = false; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) @@ -192,6 +205,38 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This ) return cso; } +void * +NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *This, + struct NineVertexDeclaration9 *vdecl_out, + struct pipe_stream_output_info *so ) +{ + struct nine_shader_info info; + HRESULT hr; + void *cso; + + cso = nine_shader_variant_so_get(&This->variant_so, vdecl_out, so); + if (cso) + return cso; + + info.type = PIPE_SHADER_VERTEX; + info.const_i_base = 0; + info.const_b_base = 0; + info.byte_code = This->byte_code.tokens; + info.sampler_mask_shadow = 0; + info.fog_enable = false; + info.point_size_min = 0; + info.point_size_max = 0; + info.swvp_on = true; + info.vdecl_out = vdecl_out; + info.process_vertices = true; + hr = nine_translate_shader(This->base.device, &info); + if (FAILED(hr)) + return NULL; + *so = info.so; + nine_shader_variant_so_add(&This->variant_so, vdecl_out, so, info.cso); + return info.cso; +} + IDirect3DVertexShader9Vtbl NineVertexShader9_vtable = { (void *)NineUnknown_QueryInterface, (void *)NineUnknown_AddRef, diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h index 823c71aa85e..1f0cfd6ac26 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.h +++ b/src/gallium/state_trackers/nine/vertexshader9.h @@ -31,6 +31,8 @@ #include "nine_shader.h" #include "nine_state.h" +struct NineVertexDeclaration9; + struct NineVertexShader9 { struct NineUnknown base; @@ -57,8 +59,6 @@ struct NineVertexShader9 struct nine_lconstf lconstf; - const struct pipe_stream_output_info *so; - uint64_t ff_key[3]; void *ff_cso; @@ -66,6 +66,9 @@ struct NineVertexShader9 void *last_cso; uint64_t next_key; + + /* so */ + struct nine_shader_variant_so variant_so; }; static inline struct NineVertexShader9 * NineVertexShader9( void *data ) @@ -107,6 +110,11 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs, void * NineVertexShader9_GetVariant( struct NineVertexShader9 *vs ); +void * +NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *vs, + struct NineVertexDeclaration9 *vdecl_out, + struct pipe_stream_output_info *so ); + /*** public ***/ HRESULT