For now only VS 3 support is implemented.
This enables The Sims 2 to work.
Signed-off-by: Axel Davy <axel.davy@ens.fr>
list_inithead(&This->managed_textures);
This->screen = pScreen;
+ This->screen_sw = pCTX->ref;
This->caps = *pCaps;
This->d3d9 = pD3D9;
This->params = *pCreationParameters;
This->pipe = This->screen->context_create(This->screen, NULL, 0);
if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
+ This->pipe_sw = This->screen_sw->context_create(This->screen_sw, NULL, 0);
+ if (!This->pipe_sw) { return E_OUTOFMEMORY; }
This->cso = cso_create_context(This->pipe);
if (!This->cso) { return E_OUTOFMEMORY; } /* also a guess */
+ This->cso_sw = cso_create_context(This->pipe_sw);
+ if (!This->cso_sw) { return E_OUTOFMEMORY; }
/* Create first, it messes up our state. */
This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
+ This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS);
+ This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS);
if (!This->driver_caps.user_vbufs)
This->vertex_uploader = u_upload_create(This->pipe, 65536,
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
+ This->vertex_sw_uploader = u_upload_create(This->pipe_sw, 65536,
+ PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
if (!This->driver_caps.user_ibufs)
This->index_uploader = u_upload_create(This->pipe, 128 * 1024,
PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM);
PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
}
+ This->constbuf_sw_uploader = u_upload_create(This->pipe_sw, 128 * 1024,
+ PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM);
+
This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
This->driver_caps.vs_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS);
This->driver_caps.ps_integer = pScreen->get_shader_param(pScreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
This->update = &This->state;
nine_update_state(This);
+ nine_state_init_sw(This);
+
ID3DPresentGroup_Release(This->present);
return D3D_OK;
if (This->pipe && This->cso)
nine_pipe_context_clear(This);
nine_ff_fini(This);
+ nine_state_destroy_sw(This);
nine_state_clear(&This->state, TRUE);
if (This->vertex_uploader)
u_upload_destroy(This->index_uploader);
if (This->constbuf_uploader)
u_upload_destroy(This->constbuf_uploader);
+ if (This->vertex_sw_uploader)
+ u_upload_destroy(This->vertex_sw_uploader);
+ if (This->constbuf_sw_uploader)
+ u_upload_destroy(This->constbuf_sw_uploader);
nine_bind(&This->record, NULL);
FREE(This->swapchains);
}
- /* state stuff */
- if (This->pipe) {
- if (This->cso) {
- cso_destroy_context(This->cso);
- }
- if (This->pipe->destroy) { This->pipe->destroy(This->pipe); }
- }
+ /* Destroy cso first */
+ if (This->cso) { cso_destroy_context(This->cso); }
+ if (This->cso_sw) { cso_destroy_context(This->cso_sw); }
+ if (This->pipe && This->pipe->destroy) { This->pipe->destroy(This->pipe); }
+ if (This->pipe_sw && This->pipe_sw->destroy) { This->pipe_sw->destroy(This->pipe_sw); }
if (This->present) { ID3DPresentGroup_Release(This->present); }
if (This->d3d9) { IDirect3D9_Release(This->d3d9); }
return D3D_OK;
}
-/* TODO: Write to pDestBuffer directly if vertex declaration contains
- * only f32 formats.
- */
HRESULT NINE_WINAPI
NineDevice9_ProcessVertices( struct NineDevice9 *This,
UINT SrcStartIndex,
IDirect3DVertexDeclaration9 *pVertexDecl,
DWORD Flags )
{
- struct pipe_screen *screen = This->screen;
+ struct pipe_screen *screen_sw = This->screen_sw;
+ struct pipe_context *pipe_sw = This->pipe_sw;
struct NineVertexDeclaration9 *vdecl = NineVertexDeclaration9(pVertexDecl);
+ struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer);
struct NineVertexShader9 *vs;
struct pipe_resource *resource;
+ struct pipe_transfer *transfer = NULL;
+ struct pipe_stream_output_info so;
struct pipe_stream_output_target *target;
struct pipe_draw_info draw;
+ struct pipe_box box;
+ unsigned offsets[1] = {0};
HRESULT hr;
- unsigned buffer_offset, buffer_size;
+ unsigned buffer_size;
+ void *map;
DBG("This=%p SrcStartIndex=%u DestIndex=%u VertexCount=%u "
"pDestBuffer=%p pVertexDecl=%p Flags=%d\n",
This, SrcStartIndex, DestIndex, VertexCount, pDestBuffer,
pVertexDecl, Flags);
- if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS))
- STUB(D3DERR_INVALIDCALL);
+ if (!screen_sw->get_param(screen_sw, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) {
+ DBG("ProcessVertices not supported\n");
+ return D3DERR_INVALIDCALL;
+ }
- nine_update_state(This);
- /* TODO: Create shader with stream output. */
- STUB(D3DERR_INVALIDCALL);
- struct NineVertexBuffer9 *dst = NineVertexBuffer9(pDestBuffer);
+ vs = This->state.programmable_vs ? This->state.vs : This->ff.vs;
+ /* Note: version is 0 for ff */
+ user_assert(vdecl || (vs->byte_code.version < 0x30 && dst->desc.FVF),
+ D3DERR_INVALIDCALL);
+ if (!vdecl) {
+ DWORD FVF = dst->desc.FVF;
+ vdecl = util_hash_table_get(This->ff.ht_fvf, &FVF);
+ if (!vdecl) {
+ hr = NineVertexDeclaration9_new_from_fvf(This, FVF, &vdecl);
+ if (FAILED(hr))
+ return hr;
+ vdecl->fvf = FVF;
+ util_hash_table_set(This->ff.ht_fvf, &vdecl->fvf, vdecl);
+ NineUnknown_ConvertRefToBind(NineUnknown(vdecl));
+ }
+ }
- vs = This->state.vs ? This->state.vs : This->ff.vs;
+ /* Flags: Can be 0 or D3DPV_DONOTCOPYDATA, and/or lock flags
+ * D3DPV_DONOTCOPYDATA -> Has effect only for ff. In particular
+ * if not set, everything from src will be used, and dst
+ * must match exactly the ff vs outputs.
+ * TODO: Handle all the checks, etc for ff */
+ user_assert(vdecl->position_t || This->state.programmable_vs,
+ D3DERR_INVALIDCALL);
+
+ /* TODO: Support vs < 3 and ff */
+ user_assert(vs->byte_code.version == 0x30,
+ D3DERR_INVALIDCALL);
+ /* TODO: Not hardcode the constant buffers for swvp */
+ user_assert(This->may_swvp,
+ D3DERR_INVALIDCALL);
+
+ nine_state_prepare_draw_sw(This, vdecl, SrcStartIndex, VertexCount, &so);
- buffer_size = VertexCount * vs->so->stride[0];
- if (1) {
+ buffer_size = VertexCount * so.stride[0] * 4;
+ {
struct pipe_resource templ;
memset(&templ, 0, sizeof(templ));
templ.height0 = templ.depth0 = templ.array_size = 1;
templ.last_level = templ.nr_samples = 0;
- resource = This->screen->resource_create(This->screen, &templ);
+ resource = screen_sw->resource_create(screen_sw, &templ);
if (!resource)
return E_OUTOFMEMORY;
- buffer_offset = 0;
- } else {
- /* SO matches vertex declaration */
- resource = NineVertexBuffer9_GetResource(dst);
- buffer_offset = DestIndex * vs->so->stride[0];
}
- target = This->pipe->create_stream_output_target(This->pipe, resource,
- buffer_offset,
- buffer_size);
+ target = pipe_sw->create_stream_output_target(pipe_sw, resource,
+ 0, buffer_size);
if (!target) {
pipe_resource_reference(&resource, NULL);
return D3DERR_DRIVERINTERNALERROR;
}
- if (!vdecl) {
- hr = NineVertexDeclaration9_new_from_fvf(This, dst->desc.FVF, &vdecl);
- if (FAILED(hr))
- goto out;
- }
-
init_draw_info(&draw, This, D3DPT_POINTLIST, VertexCount);
draw.instance_count = 1;
draw.indexed = FALSE;
- draw.start = SrcStartIndex;
+ draw.start = 0;
draw.index_bias = 0;
- draw.min_index = SrcStartIndex;
- draw.max_index = SrcStartIndex + VertexCount - 1;
+ draw.min_index = 0;
+ draw.max_index = VertexCount - 1;
+
+
+ pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets);
- This->pipe->set_stream_output_targets(This->pipe, 1, &target, 0);
- This->pipe->draw_vbo(This->pipe, &draw);
- This->pipe->set_stream_output_targets(This->pipe, 0, NULL, 0);
- This->pipe->stream_output_target_destroy(This->pipe, target);
+ pipe_sw->draw_vbo(pipe_sw, &draw);
+
+ pipe_sw->set_stream_output_targets(pipe_sw, 0, NULL, 0);
+ pipe_sw->stream_output_target_destroy(pipe_sw, target);
+
+ u_box_1d(0, VertexCount * so.stride[0] * 4, &box);
+ map = pipe_sw->transfer_map(pipe_sw, resource, 0, PIPE_TRANSFER_READ, &box,
+ &transfer);
+ if (!map) {
+ hr = D3DERR_DRIVERINTERNALERROR;
+ goto out;
+ }
hr = NineVertexDeclaration9_ConvertStreamOutput(vdecl,
dst, DestIndex, VertexCount,
- resource, vs->so);
+ map, &so);
+ if (transfer)
+ pipe_sw->transfer_unmap(pipe_sw, transfer);
+
out:
+ nine_state_after_draw_sw(This);
pipe_resource_reference(&resource, NULL);
- if (!pVertexDecl)
- NineUnknown_Release(NineUnknown(vdecl));
return hr;
}
/* G3D context */
struct pipe_screen *screen;
+ struct pipe_screen *screen_sw;
struct pipe_context *pipe;
+ struct pipe_context *pipe_sw;
struct cso_context *cso;
+ struct cso_context *cso_sw;
/* creation parameters */
D3DCAPS9 caps;
boolean user_vbufs;
boolean user_ibufs;
boolean user_cbufs;
+ boolean user_sw_vbufs;
+ boolean user_sw_cbufs;
boolean window_space_position_support;
boolean vs_integer;
boolean ps_integer;
struct u_upload_mgr *vertex_uploader;
struct u_upload_mgr *index_uploader;
struct u_upload_mgr *constbuf_uploader;
+ struct u_upload_mgr *vertex_sw_uploader;
+ struct u_upload_mgr *constbuf_sw_uploader;
unsigned constbuf_alignment;
struct nine_range_pool range_pool;
#include "device9.h"
#include "nine_debug.h"
#include "nine_state.h"
+#include "vertexdeclaration9.h"
#include "util/macros.h"
#include "util/u_memory.h"
struct {
struct ureg_dst *r;
struct ureg_dst oPos;
+ struct ureg_dst oPos_out; /* the real output when doing streamout */
struct ureg_dst oFog;
struct ureg_dst oPts;
struct ureg_dst oCol[4];
boolean indirect_const_access;
boolean failure;
+ struct nine_vs_output_info output_info[16];
+ int num_outputs;
+
struct nine_shader_info *info;
int16_t op_info_map[D3DSIO_BREAKP + 1];
}
}
+static void
+nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
+ int mask, int output_index)
+{
+ tx->output_info[tx->num_outputs].output_semantic = Usage;
+ tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
+ tx->output_info[tx->num_outputs].mask = mask;
+ tx->output_info[tx->num_outputs].output_index = output_index;
+ tx->num_outputs++;
+}
+
static boolean
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
+ nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
+ if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
+ tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
+ tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
+ tx->regs.oPos = tx->regs.o[sem.reg.idx];
+ }
if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
info->version = (tx->version.major << 4) | tx->version.minor;
+ tx->num_outputs = 0;
+
create_op_info_map(tx);
}
FREE(tx);
}
+/* CONST[0].xyz = width/2, -height/2, zmax-zmin
+ * CONST[1].xyz = x+width/2, y+height/2, zmin */
+static void
+shader_add_vs_viewport_transform(struct shader_translator *tx)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_src c0 = NINE_CONSTANT_SRC(0);
+ struct ureg_src c1 = NINE_CONSTANT_SRC(1);
+ /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
+
+ c0 = ureg_src_dimension(c0, 4);
+ c1 = ureg_src_dimension(c1, 4);
+ /* TODO: find out when we need to apply the viewport transformation or not.
+ * Likely will be XYZ vs XYZRHW in vdecl_out
+ * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
+ * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
+ */
+ ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
+}
+
static void
shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
{
ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
}
-#define GET_CAP(n) device->screen->get_param( \
- device->screen, PIPE_CAP_##n)
-#define GET_SHADER_CAP(n) device->screen->get_shader_param( \
- device->screen, info->type, PIPE_SHADER_CAP_##n)
+#define GET_CAP(n) screen->get_param( \
+ screen, PIPE_CAP_##n)
+#define GET_SHADER_CAP(n) screen->get_shader_param( \
+ screen, info->type, PIPE_SHADER_CAP_##n)
HRESULT
nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
struct shader_translator *tx;
HRESULT hr = D3D_OK;
const unsigned processor = info->type;
+ struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
+ struct pipe_context *pipe = info->process_vertices ? device->pipe_sw : device->pipe;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
info->point_size = TRUE;
}
+ if (info->process_vertices)
+ shader_add_vs_viewport_transform(tx);
+
ureg_END(tx->ureg);
/* record local constants */
ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
}
+ if (info->process_vertices)
+ ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
+
if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
unsigned count;
const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
ureg_free_tokens(toks);
}
- info->cso = ureg_create_shader_and_destroy(tx->ureg, device->pipe);
+ if (info->process_vertices) {
+ NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
+ tx->output_info,
+ tx->num_outputs,
+ &(info->so));
+ info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
+ } else
+ info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
if (!info->cso) {
hr = D3DERR_DRIVERINTERNALERROR;
FREE(info->lconstf.data);
#include "d3d9types.h"
#include "d3d9caps.h"
#include "nine_defines.h"
+#include "nine_helpers.h"
#include "pipe/p_state.h" /* PIPE_MAX_ATTRIBS */
#include "util/u_memory.h"
struct NineDevice9;
+struct NineVertexDeclaration9;
struct nine_lconstf /* NOTE: both pointers should be FREE'd by the user */
{
uint8_t bumpenvmat_needed;
boolean swvp_on;
+
+ boolean process_vertices;
+ struct NineVertexDeclaration9 *vdecl_out;
+ struct pipe_stream_output_info so;
+};
+
+struct nine_vs_output_info
+{
+ BYTE output_semantic;
+ int output_semantic_index;
+ int mask;
+ int output_index;
};
static inline void
}
}
+struct nine_shader_variant_so
+{
+ struct nine_shader_variant_so *next;
+ struct NineVertexDeclaration9 *vdecl;
+ struct pipe_stream_output_info so;
+ void *cso;
+};
+
+static inline void *
+nine_shader_variant_so_get(struct nine_shader_variant_so *list,
+ struct NineVertexDeclaration9 *vdecl,
+ struct pipe_stream_output_info *so)
+{
+ while (list->vdecl != vdecl && list->next)
+ list = list->next;
+ if (list->vdecl == vdecl) {
+ *so = list->so;
+ return list->cso;
+ }
+ return NULL;
+}
+
+static inline boolean
+nine_shader_variant_so_add(struct nine_shader_variant_so *list,
+ struct NineVertexDeclaration9 *vdecl,
+ struct pipe_stream_output_info *so, void *cso)
+{
+ if (list->vdecl == NULL) { /* first shader */
+ list->next = NULL;
+ nine_bind(&list->vdecl, vdecl);
+ list->so = *so;
+ list->cso = cso;
+ return TRUE;
+ }
+ while (list->next) {
+ assert(list->vdecl != vdecl);
+ list = list->next;
+ }
+ list->next = MALLOC_STRUCT(nine_shader_variant_so);
+ if (!list->next)
+ return FALSE;
+ list->next->next = NULL;
+ nine_bind(&list->vdecl, vdecl);
+ list->next->so = *so;
+ list->next->cso = cso;
+ return TRUE;
+}
+
+static inline void
+nine_shader_variants_so_free(struct nine_shader_variant_so *list)
+{
+ while (list->next) {
+ struct nine_shader_variant_so *ptr = list->next;
+ list->next = ptr->next;
+ nine_bind(&ptr->vdecl, NULL);
+ FREE(ptr);
+ }
+ if (list->vdecl)
+ nine_bind(&list->vdecl, NULL);
+}
+
#endif /* _NINE_SHADER_H_ */
#include "buffer9.h"
#include "indexbuffer9.h"
#include "surface9.h"
+#include "vertexbuffer9.h"
#include "vertexdeclaration9.h"
#include "vertexshader9.h"
#include "pixelshader9.h"
#include "cso_cache/cso_context.h"
#include "util/u_upload_mgr.h"
#include "util/u_math.h"
+#include "util/u_box.h"
+#include "util/u_simple_shaders.h"
#define DBG_CHANNEL DBG_DEVICE
}
}
+void
+nine_state_init_sw(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe_sw = device->pipe_sw;
+ struct pipe_rasterizer_state rast;
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_framebuffer_state fb;
+
+ /* Only used with Streamout */
+ memset(&rast, 0, sizeof(rast));
+ rast.rasterizer_discard = true;
+ rast.point_quad_rasterization = 1; /* to make llvmpipe happy */
+ cso_set_rasterizer(device->cso_sw, &rast);
+
+ /* dummy settings */
+ memset(&blend, 0, sizeof(blend));
+ memset(&dsa, 0, sizeof(dsa));
+ memset(&fb, 0, sizeof(fb));
+ cso_set_blend(device->cso_sw, &blend);
+ cso_set_depth_stencil_alpha(device->cso_sw, &dsa);
+ cso_set_framebuffer(device->cso_sw, &fb);
+ cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false);
+ cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw));
+}
+
+/* There is duplication with update_vertex_elements.
+ * TODO: Share the code */
+
+static void
+update_vertex_elements_sw(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+ const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
+ const struct NineVertexShader9 *vs;
+ unsigned n, b, i;
+ int index;
+ char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
+ char used_streams[device->caps.MaxStreams];
+ int dummy_vbo_stream = -1;
+ BOOL need_dummy_vbo = FALSE;
+ struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
+
+ state->stream_usage_mask = 0;
+ memset(vdecl_index_map, -1, 16);
+ memset(used_streams, 0, device->caps.MaxStreams);
+ vs = state->programmable_vs ? device->state.vs : device->ff.vs;
+
+ if (vdecl) {
+ for (n = 0; n < vs->num_inputs; ++n) {
+ DBG("looking up input %u (usage %u) from vdecl(%p)\n",
+ n, vs->input_map[n].ndecl, vdecl);
+
+ for (i = 0; i < vdecl->nelems; i++) {
+ if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
+ vdecl_index_map[n] = i;
+ used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
+ break;
+ }
+ }
+ if (vdecl_index_map[n] < 0)
+ need_dummy_vbo = TRUE;
+ }
+ } else {
+ /* No vertex declaration. Likely will never happen in practice,
+ * but we need not crash on this */
+ need_dummy_vbo = TRUE;
+ }
+
+ if (need_dummy_vbo) {
+ for (i = 0; i < device->caps.MaxStreams; i++ ) {
+ if (!used_streams[i]) {
+ dummy_vbo_stream = i;
+ break;
+ }
+ }
+ }
+ /* there are less vertex shader inputs than stream slots,
+ * so if we need a slot for the dummy vbo, we should have found one */
+ assert (!need_dummy_vbo || dummy_vbo_stream != -1);
+
+ for (n = 0; n < vs->num_inputs; ++n) {
+ index = vdecl_index_map[n];
+ if (index >= 0) {
+ ve[n] = vdecl->elems[index];
+ b = ve[n].vertex_buffer_index;
+ state->stream_usage_mask |= 1 << b;
+ /* XXX wine just uses 1 here: */
+ if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
+ ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
+ } else {
+ /* if the vertex declaration is incomplete compared to what the
+ * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
+ * This is not precised by the spec, but is the behaviour
+ * tested on win */
+ ve[n].vertex_buffer_index = dummy_vbo_stream;
+ ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ ve[n].src_offset = 0;
+ ve[n].instance_divisor = 0;
+ }
+ }
+
+ if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
+ if (state->dummy_vbo_bound_at >= 0)
+ state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
+ if (dummy_vbo_stream >= 0) {
+ state->changed.vtxbuf |= 1 << dummy_vbo_stream;
+ state->vbo_bound_done = FALSE;
+ }
+ state->dummy_vbo_bound_at = dummy_vbo_stream;
+ }
+
+ cso_set_vertex_elements(device->cso_sw, vs->num_inputs, ve);
+}
+
+static void
+update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices)
+{
+ struct pipe_context *pipe = device->pipe;
+ struct pipe_context *pipe_sw = device->pipe_sw;
+ struct nine_state *state = &device->state;
+ struct pipe_vertex_buffer vtxbuf;
+ uint32_t mask = 0xf;
+ unsigned i;
+
+ DBG("mask=%x\n", mask);
+
+ assert (state->dummy_vbo_bound_at < 0);
+ /* TODO: handle dummy_vbo_bound_at */
+
+ for (i = 0; mask; mask >>= 1, ++i) {
+ if (mask & 1) {
+ if (state->vtxbuf[i].buffer) {
+ struct pipe_resource *buf;
+ struct pipe_box box;
+
+ vtxbuf = state->vtxbuf[i];
+
+ DBG("Locking %p (offset %d, length %d)\n", vtxbuf.buffer,
+ vtxbuf.buffer_offset, num_vertices * vtxbuf.stride);
+
+ u_box_1d(vtxbuf.buffer_offset + start_vertice * vtxbuf.stride,
+ num_vertices * vtxbuf.stride, &box);
+ buf = vtxbuf.buffer;
+ vtxbuf.user_buffer = pipe->transfer_map(pipe, buf, 0, PIPE_TRANSFER_READ, &box,
+ &(state->transfers_so[i]));
+ vtxbuf.buffer = NULL;
+ if (!device->driver_caps.user_sw_vbufs) {
+ u_upload_data(device->vertex_sw_uploader,
+ 0,
+ box.width,
+ 16,
+ vtxbuf.user_buffer,
+ &(vtxbuf.buffer_offset),
+ &(vtxbuf.buffer));
+ u_upload_unmap(device->vertex_sw_uploader);
+ vtxbuf.user_buffer = NULL;
+ }
+ pipe_sw->set_vertex_buffers(pipe_sw, i, 1, &vtxbuf);
+ if (vtxbuf.buffer)
+ pipe_resource_reference(&vtxbuf.buffer, NULL);
+ } else
+ pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
+ }
+ }
+}
+
+static void
+update_vs_constants_sw(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+ struct pipe_context *pipe_sw = device->pipe_sw;
+
+ DBG("updating\n");
+
+ {
+ struct pipe_constant_buffer cb;
+ const void *buf;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 4096 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_f_swvp;
+
+ if (state->vs->lconstf.ranges) {
+ const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
+ const struct nine_range *r = lconstf->ranges;
+ unsigned n = 0;
+ float *dst = device->state.vs_lconstf_temp;
+ float *src = (float *)cb.user_buffer;
+ memcpy(dst, src, 8192 * sizeof(float[4]));
+ while (r) {
+ unsigned p = r->bgn;
+ unsigned c = r->end - r->bgn;
+ memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
+ n += c;
+ r = r->next;
+ }
+ cb.user_buffer = dst;
+ }
+
+ buf = cb.user_buffer;
+ if (!device->driver_caps.user_sw_cbufs) {
+ u_upload_data(device->constbuf_sw_uploader,
+ 0,
+ cb.buffer_size,
+ 16,
+ cb.user_buffer,
+ &(cb.buffer_offset),
+ &(cb.buffer));
+ u_upload_unmap(device->constbuf_sw_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, &cb);
+ if (cb.buffer)
+ pipe_resource_reference(&cb.buffer, NULL);
+
+ cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]);
+ if (!device->driver_caps.user_sw_cbufs) {
+ u_upload_data(device->constbuf_sw_uploader,
+ 0,
+ cb.buffer_size,
+ 16,
+ cb.user_buffer,
+ &(cb.buffer_offset),
+ &(cb.buffer));
+ u_upload_unmap(device->constbuf_sw_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, &cb);
+ if (cb.buffer)
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 2048 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_i;
+
+ if (!device->driver_caps.user_sw_cbufs) {
+ u_upload_data(device->constbuf_sw_uploader,
+ 0,
+ cb.buffer_size,
+ 16,
+ cb.user_buffer,
+ &(cb.buffer_offset),
+ &(cb.buffer));
+ u_upload_unmap(device->constbuf_sw_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, &cb);
+ if (cb.buffer)
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 512 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_b;
+
+ if (!device->driver_caps.user_sw_cbufs) {
+ u_upload_data(device->constbuf_sw_uploader,
+ 0,
+ cb.buffer_size,
+ 16,
+ cb.user_buffer,
+ &(cb.buffer_offset),
+ &(cb.buffer));
+ u_upload_unmap(device->constbuf_sw_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, &cb);
+ if (cb.buffer)
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ {
+ struct pipe_constant_buffer cb;
+ const D3DVIEWPORT9 *vport = &device->state.viewport;
+ float viewport_data[8] = {(float)vport->Width * 0.5f,
+ (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f,
+ (float)vport->Width * 0.5f + (float)vport->X,
+ (float)vport->Height * 0.5f + (float)vport->Y,
+ vport->MinZ, 0.f};
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 2 * sizeof(float[4]);
+ cb.user_buffer = viewport_data;
+
+ {
+ u_upload_data(device->constbuf_sw_uploader,
+ 0,
+ cb.buffer_size,
+ 16,
+ cb.user_buffer,
+ &(cb.buffer_offset),
+ &(cb.buffer));
+ u_upload_unmap(device->constbuf_sw_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, &cb);
+ if (cb.buffer)
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+}
+
+void
+nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out,
+ int start_vertice, int num_vertices, struct pipe_stream_output_info *so)
+{
+ struct nine_state *state = &device->state;
+
+ struct NineVertexShader9 *vs = state->programmable_vs ? device->state.vs : device->ff.vs;
+
+ assert(state->programmable_vs);
+
+ DBG("Preparing draw\n");
+ cso_set_vertex_shader_handle(device->cso_sw,
+ NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so));
+ update_vertex_elements_sw(device);
+ update_vertex_buffers_sw(device, start_vertice, num_vertices);
+ update_vs_constants_sw(device);
+ DBG("Preparation succeeded\n");
+}
+
+void
+nine_state_after_draw_sw(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+ struct pipe_context *pipe = device->pipe;
+ struct pipe_context *pipe_sw = device->pipe_sw;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
+ if (state->transfers_so[i])
+ pipe->transfer_unmap(pipe, state->transfers_so[i]);
+ state->transfers_so[i] = NULL;
+ }
+}
+
+void
+nine_state_destroy_sw(struct NineDevice9 *device)
+{
+ (void) device;
+ /* Everything destroyed with cso */
+}
+
/*
static const DWORD nine_render_states_pixel[] =
{
struct pipe_constant_buffer cb_vs_ff;
struct pipe_constant_buffer cb_ps_ff;
} pipe;
+
+ /* sw */
+ struct pipe_transfer *transfers_so[4];
};
/* map D3DRS -> NINE_STATE_x
boolean is_reset);
void nine_state_clear(struct nine_state *, const boolean device);
+void nine_state_init_sw(struct NineDevice9 *device);
+void nine_state_prepare_draw_sw(struct NineDevice9 *device,
+ struct NineVertexDeclaration9 *vdecl_out,
+ int start_vertice,
+ int num_vertices,
+ struct pipe_stream_output_info *so);
+void nine_state_after_draw_sw(struct NineDevice9 *device);
+void nine_state_destroy_sw(struct NineDevice9 *device);
+
/* If @alloc is FALSE, the return value may be a const identity matrix.
* Therefore, do not modify if you set alloc to FALSE !
*/
info.sampler_ps1xtypes = 0x0;
info.fog_enable = 0;
info.projected = 0;
+ info.process_vertices = false;
hr = nine_translate_shader(device, &info);
if (FAILED(hr))
info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
info.force_color_in_centroid = key >> 34 & 1;
info.projected = (key >> 48) & 0xffff;
+ info.process_vertices = false;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
#include "vertexbuffer9.h"
#include "device9.h"
#include "nine_helpers.h"
+#include "nine_shader.h"
#include "pipe/p_format.h"
#include "pipe/p_context.h"
#include "util/u_math.h"
#include "util/u_format.h"
-#include "util/u_box.h"
#include "translate/translate.h"
#define DBG_CHANNEL DBG_VERTEXDECLARATION
NINE_DEVICE_CHILD_NEW(VertexDeclaration9, ppOut, /* args */ pDevice, elems);
}
+void
+NineVertexDeclaration9_FillStreamOutputInfo(
+ struct NineVertexDeclaration9 *This,
+ struct nine_vs_output_info *ShaderOutputsInfo,
+ unsigned numOutputs,
+ struct pipe_stream_output_info *so )
+{
+ unsigned so_outputs = 0;
+ int i, j;
+
+ memset(so, 0, sizeof(struct pipe_stream_output_info));
+
+ for (i = 0; i < numOutputs; i++) {
+ BYTE output_semantic = ShaderOutputsInfo[i].output_semantic;
+ unsigned output_semantic_index = ShaderOutputsInfo[i].output_semantic_index;
+
+ for (j = 0; j < This->nelems; j++) {
+ if ((This->decls[j].Usage == output_semantic ||
+ (output_semantic == D3DDECLUSAGE_POSITION &&
+ This->decls[j].Usage == D3DDECLUSAGE_POSITIONT)) &&
+ This->decls[j].UsageIndex == output_semantic_index) {
+ DBG("Matching %s %d: o%d -> %d\n",
+ nine_declusage_name(nine_d3d9_to_nine_declusage(This->decls[j].Usage, 0)),
+ This->decls[j].UsageIndex, i, j);
+ so->output[so_outputs].register_index = ShaderOutputsInfo[i].output_index;
+ so->output[so_outputs].start_component = 0;
+ if (ShaderOutputsInfo[i].mask & 8)
+ so->output[so_outputs].num_components = 4;
+ else if (ShaderOutputsInfo[i].mask & 4)
+ so->output[so_outputs].num_components = 3;
+ else if (ShaderOutputsInfo[i].mask & 2)
+ so->output[so_outputs].num_components = 2;
+ else
+ so->output[so_outputs].num_components = 1;
+ so->output[so_outputs].output_buffer = 0;
+ so->output[so_outputs].dst_offset = so_outputs * sizeof(float[4])/4;
+ so->output[so_outputs].stream = 0;
+ so_outputs++;
+ break;
+ }
+ }
+ }
+
+ so->num_outputs = so_outputs;
+ so->stride[0] = so_outputs * sizeof(float[4])/4;
+}
+
/* ProcessVertices runs stream output into a temporary buffer to capture
* all outputs.
* Now we have to convert them to the format and order set by the vertex
struct NineVertexBuffer9 *pDstBuf,
UINT DestIndex,
UINT VertexCount,
- struct pipe_resource *pSrcBuf,
+ void *pSrcBuf,
const struct pipe_stream_output_info *so )
{
- struct pipe_context *pipe = This->base.device->pipe;
- struct pipe_transfer *transfer = NULL;
struct translate *translate;
struct translate_key transkey;
- struct pipe_box box;
HRESULT hr;
unsigned i;
- void *src_map;
void *dst_map;
DBG("This=%p pDstBuf=%p DestIndex=%u VertexCount=%u pSrcBuf=%p so=%p\n",
if (FAILED(hr))
goto out;
- src_map = pipe->transfer_map(pipe, pSrcBuf, 0, PIPE_TRANSFER_READ, &box,
- &transfer);
- if (!src_map) {
- hr = D3DERR_DRIVERINTERNALERROR;
- goto out;
- }
- translate->set_buffer(translate, 0, src_map, so->stride[0], ~0);
+ translate->set_buffer(translate, 0, pSrcBuf, so->stride[0] * 4, ~0);
translate->run(translate, 0, VertexCount, 0, 0, dst_map);
NineVertexBuffer9_Unlock(pDstBuf);
out:
- if (transfer)
- pipe->transfer_unmap(pipe, transfer);
translate->release(translate); /* TODO: cache these */
return hr;
}
struct pipe_stream_output_info;
struct NineDevice9;
struct NineVertexBuffer9;
+struct nine_vs_output_info;
struct NineVertexDeclaration9
{
D3DVERTEXELEMENT9 *pElement,
UINT *pNumElements );
+void
+NineVertexDeclaration9_FillStreamOutputInfo(
+ struct NineVertexDeclaration9 *This,
+ struct nine_vs_output_info *ShaderOutputsInfo,
+ unsigned numOutputs,
+ struct pipe_stream_output_info *so );
+
/* Convert stream output data to the vertex declaration's format. */
HRESULT
NineVertexDeclaration9_ConvertStreamOutput(
struct NineVertexBuffer9 *pDstBuf,
UINT DestIndex,
UINT VertexCount,
- struct pipe_resource *pSrcBuf,
+ void *pSrcBuf,
const struct pipe_stream_output_info *so );
#endif /* _NINE_VERTEXDECLARATION9_H_ */
#include "nine_helpers.h"
#include "nine_shader.h"
+#include "vertexdeclaration9.h"
#include "vertexshader9.h"
#include "device9.h"
#include "pipe/p_context.h"
+#include "cso_cache/cso_context.h"
#define DBG_CHANNEL DBG_VERTEXSHADER
info.point_size_min = 0;
info.point_size_max = 0;
info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
+ info.process_vertices = false;
hr = nine_translate_shader(device, &info);
if (hr == D3DERR_INVALIDCALL &&
if (This->base.device) {
struct pipe_context *pipe = This->base.device->pipe;
struct nine_shader_variant *var = &This->variant;
+ struct nine_shader_variant_so *var_so = &This->variant_so;
do {
if (var->cso) {
var = var->next;
} while (var);
+ while (var_so && var_so->vdecl) {
+ if (var_so->cso) {
+ cso_delete_vertex_shader(This->base.device->cso_sw, var_so->cso );
+ }
+ var_so = var_so->next;
+ }
+
if (This->ff_cso) {
if (This->ff_cso == This->base.device->state.cso.vs)
pipe->bind_vs_state(pipe, NULL);
}
}
nine_shader_variants_free(&This->variant);
+ nine_shader_variants_so_free(&This->variant_so);
FREE((void *)This->byte_code.tokens); /* const_cast */
info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
info.swvp_on = device->swvp;
+ info.process_vertices = false;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
return cso;
}
+void *
+NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *This,
+ struct NineVertexDeclaration9 *vdecl_out,
+ struct pipe_stream_output_info *so )
+{
+ struct nine_shader_info info;
+ HRESULT hr;
+ void *cso;
+
+ cso = nine_shader_variant_so_get(&This->variant_so, vdecl_out, so);
+ if (cso)
+ return cso;
+
+ info.type = PIPE_SHADER_VERTEX;
+ info.const_i_base = 0;
+ info.const_b_base = 0;
+ info.byte_code = This->byte_code.tokens;
+ info.sampler_mask_shadow = 0;
+ info.fog_enable = false;
+ info.point_size_min = 0;
+ info.point_size_max = 0;
+ info.swvp_on = true;
+ info.vdecl_out = vdecl_out;
+ info.process_vertices = true;
+ hr = nine_translate_shader(This->base.device, &info);
+ if (FAILED(hr))
+ return NULL;
+ *so = info.so;
+ nine_shader_variant_so_add(&This->variant_so, vdecl_out, so, info.cso);
+ return info.cso;
+}
+
IDirect3DVertexShader9Vtbl NineVertexShader9_vtable = {
(void *)NineUnknown_QueryInterface,
(void *)NineUnknown_AddRef,
#include "nine_shader.h"
#include "nine_state.h"
+struct NineVertexDeclaration9;
+
struct NineVertexShader9
{
struct NineUnknown base;
struct nine_lconstf lconstf;
- const struct pipe_stream_output_info *so;
-
uint64_t ff_key[3];
void *ff_cso;
void *last_cso;
uint64_t next_key;
+
+ /* so */
+ struct nine_shader_variant_so variant_so;
};
static inline struct NineVertexShader9 *
NineVertexShader9( void *data )
void *
NineVertexShader9_GetVariant( struct NineVertexShader9 *vs );
+void *
+NineVertexShader9_GetVariantProcessVertices( struct NineVertexShader9 *vs,
+ struct NineVertexDeclaration9 *vdecl_out,
+ struct pipe_stream_output_info *so );
+
/*** public ***/
HRESULT