From: Neha Bhende Date: Tue, 26 May 2020 15:56:42 +0000 (+0530) Subject: svga: Add GL4.1(compatibility profile) support in svga driver X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ccb4ea5a43e89fcc93fff98c881639223f1538e5;p=mesa.git svga: Add GL4.1(compatibility profile) support in svga driver This patch is a squash commit of a very long in-house patch series. Reviewed-by: Brian Paul Reviewed-by: Charmaine Lee Signed-off-by: Neha Bhende Part-of: --- diff --git a/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h index 77af6d39a5a..e23ee53ffb1 100644 --- a/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h +++ b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h @@ -201,7 +201,7 @@ typedef enum { VGPU10_OPCODE_DCL_GLOBAL_FLAGS = 106, /* GL guest */ - VGPU10_OPCODE_IDIV = 107, + VGPU10_OPCODE_VMWARE = 107, /* DX10.1 */ VGPU10_OPCODE_LOD = 108, diff --git a/src/gallium/drivers/svga/include/svga3d_types.h b/src/gallium/drivers/svga/include/svga3d_types.h index 48eafe72202..94262314e29 100644 --- a/src/gallium/drivers/svga/include/svga3d_types.h +++ b/src/gallium/drivers/svga/include/svga3d_types.h @@ -436,8 +436,9 @@ typedef uint32 SVGA3dSurfaceFlags; * mob-backing to store all the samples. */ #define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32) +#define SVGA3D_SURFACE_DRAWINDIRECT_ARGS (CONST64U(1) << 38) -#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 33) +#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 42) /* * Surface flags types: @@ -464,7 +465,8 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE \ + SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ ) #define SVGA3D_SURFACE_2D_DISALLOWED_MASK \ @@ -480,7 +482,8 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE \ + SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ ) #define SVGA3D_SURFACE_BASICOPS_DISALLOWED_MASK \ @@ -508,7 +511,8 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE \ + SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ ) #define SVGA3D_SURFACE_BUFFER_DISALLOWED_MASK \ @@ -527,7 +531,8 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_VOLUME | \ SVGA3D_SURFACE_1D | \ SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_MOB_PITCH \ + SVGA3D_SURFACE_MOB_PITCH | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ ) #define SVGA3D_SURFACE_DX_ONLY_MASK \ @@ -636,7 +641,8 @@ typedef uint64 SVGA3dSurfaceAllFlags; SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE \ + SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ ) diff --git a/src/gallium/drivers/svga/meson.build b/src/gallium/drivers/svga/meson.build index 368d0c7f342..8dcdadd6e1d 100644 --- a/src/gallium/drivers/svga/meson.build +++ b/src/gallium/drivers/svga/meson.build @@ -36,6 +36,7 @@ files_svga = files( 'svga_pipe_flush.c', 'svga_pipe_fs.c', 'svga_pipe_gs.c', + 'svga_pipe_ts.c', 'svga_pipe_misc.c', 'svga_pipe_query.c', 'svga_pipe_rasterizer.c', @@ -56,6 +57,7 @@ files_svga = files( 'svga_state_framebuffer.c', 'svga_state_fs.c', 'svga_state_gs.c', + 'svga_state_ts.c', 'svga_state_need_swtnl.c', 'svga_state_rss.c', 'svga_state_sampler.c', diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h index f6cb4fc27c1..22a40cf05cb 100644 --- a/src/gallium/drivers/svga/svga_cmd.h +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -697,4 +697,33 @@ SVGA3D_vgpu10_ResolveCopy(struct svga_winsys_context *swc, struct svga_winsys_surface *src, const SVGA3dSurfaceFormat copyFormat); +enum pipe_error +SVGA3D_sm5_DrawIndexedInstancedIndirect(struct svga_winsys_context *swc, + struct svga_winsys_surface *argBuffer, + unsigned argOffset); + +enum pipe_error +SVGA3D_sm5_DrawInstancedIndirect(struct svga_winsys_context *swc, + struct svga_winsys_surface *argBuffer, + unsigned argOffset); + +enum pipe_error +SVGA3D_sm5_Dispatch(struct svga_winsys_context *swc, + const uint32 threadGroupCount[3]); + +enum pipe_error +SVGA3D_sm5_DispatchIndirect(struct svga_winsys_context *swc, + struct svga_winsys_surface *argBuffer, + uint32 argOffset); + +enum pipe_error +SVGA3D_sm5_DefineAndBindStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid, + uint32 numOutputStreamEntries, + uint32 numOutputStreamStrides, + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS], + struct svga_winsys_buffer *declBuf, + uint32 rasterizedStream, + uint32 sizeInBytes); + #endif /* __SVGA3D_H__ */ diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c index 1ca050ecb7a..eb5a482d9ba 100644 --- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c +++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c @@ -1130,7 +1130,7 @@ SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc, memcpy(cmd->decl, decl, sizeof(SVGA3dStreamOutputDeclarationEntry) - * SVGA3D_MAX_STREAMOUT_DECLS); + * SVGA3D_MAX_DX10_STREAMOUT_DECLS); cmd->rasterizedStream = 0; swc->commit(swc); @@ -1432,3 +1432,159 @@ SVGA3D_vgpu10_ResolveCopy(struct svga_winsys_context *swc, return PIPE_OK; } + + +enum pipe_error +SVGA3D_sm5_DrawIndexedInstancedIndirect(struct svga_winsys_context *swc, + struct svga_winsys_surface *argBuffer, + unsigned argOffset) +{ + SVGA3dCmdDXDrawIndexedInstancedIndirect *cmd = + SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT, + sizeof(SVGA3dCmdDXDrawIndexedInstancedIndirect), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->argsBufferSid, NULL, argBuffer, + SVGA_RELOC_READ); + cmd->byteOffsetForArgs = argOffset; + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_sm5_DrawInstancedIndirect(struct svga_winsys_context *swc, + struct svga_winsys_surface *argBuffer, + unsigned argOffset) +{ + SVGA3dCmdDXDrawInstancedIndirect *cmd = + SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT, + sizeof(SVGA3dCmdDXDrawInstancedIndirect), + 1); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->argsBufferSid, NULL, argBuffer, + SVGA_RELOC_READ); + cmd->byteOffsetForArgs = argOffset; + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_sm5_Dispatch(struct svga_winsys_context *swc, + const uint32 threadGroupCount[3]) +{ + SVGA3dCmdDXDispatch *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_DISPATCH, + sizeof(SVGA3dCmdDXDispatch), + 0); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->threadGroupCountX = threadGroupCount[0]; + cmd->threadGroupCountY = threadGroupCount[1]; + cmd->threadGroupCountZ = threadGroupCount[2]; + + swc->commit(swc); + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_sm5_DispatchIndirect(struct svga_winsys_context *swc, + struct svga_winsys_surface *argBuffer, + uint32 argOffset) +{ + SVGA3dCmdDXDispatchIndirect *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DX_DISPATCH_INDIRECT, + sizeof(SVGA3dCmdDXDispatchIndirect), + 1); + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->argsBufferSid, NULL, argBuffer, + SVGA_RELOC_READ); + cmd->byteOffsetForArgs = argOffset; + + swc->commit(swc); + return PIPE_OK; +} + + +/** + * We don't want any flush between DefineStreamOutputWithMob and + * BindStreamOutput because it will cause partial state in command + * buffer. This function make that sure there is enough room for + * both commands before issuing them + */ + +enum pipe_error +SVGA3D_sm5_DefineAndBindStreamOutput(struct svga_winsys_context *swc, + SVGA3dStreamOutputId soid, + uint32 numOutputStreamEntries, + uint32 numOutputStreamStrides, + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS], + struct svga_winsys_buffer *declBuf, + uint32 rasterizedStream, + uint32 sizeInBytes) +{ + unsigned i; + SVGA3dCmdHeader *header; + SVGA3dCmdDXDefineStreamOutputWithMob *dcmd; + SVGA3dCmdDXBindStreamOutput *bcmd; + + unsigned totalSize = 2 * sizeof(*header) + + sizeof(*dcmd) + sizeof(*bcmd); + + /* Make sure there is room for both commands */ + header = swc->reserve(swc, totalSize, 2); + if (!header) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* DXDefineStreamOutputWithMob command */ + header->id = SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT_WITH_MOB; + header->size = sizeof(*dcmd); + dcmd = (SVGA3dCmdDXDefineStreamOutputWithMob *)(header + 1); + dcmd->soid= soid; + dcmd->numOutputStreamEntries = numOutputStreamEntries; + dcmd->numOutputStreamStrides = numOutputStreamStrides; + dcmd->rasterizedStream = rasterizedStream; + + for (i = 0; i < ARRAY_SIZE(dcmd->streamOutputStrideInBytes); i++) + dcmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i]; + + + /* DXBindStreamOutput command */ + header = (SVGA3dCmdHeader *)(dcmd + 1); + + header->id = SVGA_3D_CMD_DX_BIND_STREAMOUTPUT; + header->size = sizeof(*bcmd); + bcmd = (SVGA3dCmdDXBindStreamOutput *)(header + 1); + + bcmd->soid = soid; + bcmd->offsetInBytes = 0; + swc->mob_relocation(swc, &bcmd->mobid, + &bcmd->offsetInBytes, declBuf, 0, + SVGA_RELOC_WRITE); + + bcmd->sizeInBytes = sizeInBytes; + bcmd->offsetInBytes = 0; + + + swc->commit(swc); + return PIPE_OK; +} diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index cdc222e2438..4ef99efe989 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -44,6 +44,7 @@ #include "svga_debug.h" #include "svga_state.h" #include "svga_winsys.h" +#include "svga_streamout.h" #define CONST0_UPLOAD_DEFAULT_SIZE 65536 @@ -79,6 +80,9 @@ svga_destroy(struct pipe_context *pipe) pipe->delete_blend_state(pipe, svga->noop_blend); + /* destroy stream output statistics queries */ + svga_destroy_stream_output_queries(svga); + /* free query gb object */ if (svga->gb_query) { pipe->destroy_query(pipe, NULL); @@ -91,6 +95,7 @@ svga_destroy(struct pipe_context *pipe) svga_cleanup_framebuffer(svga); svga_cleanup_tss_binding(svga); svga_cleanup_vertex_state(svga); + svga_cleanup_tcs_state(svga); svga_destroy_swtnl(svga); svga_hwtnl_destroy(svga->hwtnl); @@ -174,12 +179,14 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) svga_init_fs_functions(svga); svga_init_vs_functions(svga); svga_init_gs_functions(svga); + svga_init_ts_functions(svga); svga_init_vertex_functions(svga); svga_init_constbuffer_functions(svga); svga_init_query_functions(svga); svga_init_surface_functions(svga); svga_init_stream_output_functions(svga); svga_init_clear_functions(svga); + svga_init_tracked_state(svga); /* init misc state */ svga->curr.sample_mask = ~0; @@ -250,6 +257,7 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear)); memset(&svga->state.hw_clear.framebuffer, 0x0, sizeof(svga->state.hw_clear.framebuffer)); + memset(&svga->state.hw_clear.rtv, 0, sizeof(svga->state.hw_clear.rtv)); svga->state.hw_clear.num_rendertargets = 0; svga->state.hw_clear.dsv = NULL; @@ -269,6 +277,8 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) svga->state.hw_draw.vs = NULL; svga->state.hw_draw.gs = NULL; svga->state.hw_draw.fs = NULL; + svga->state.hw_draw.tcs = NULL; + svga->state.hw_draw.tes = NULL; /* Initialize the currently bound buffer resources */ memset(svga->state.hw_draw.constbuf, 0, @@ -303,10 +313,16 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags) svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl); } - svga->dirty = ~0; + svga->dirty = SVGA_NEW_ALL; svga->pred.query_id = SVGA3D_INVALID_ID; svga->disable_rasterizer = FALSE; + /** + * Create stream output statistics queries used in the workaround for auto + * draw with stream instancing. + */ + svga_create_stream_output_queries(svga); + goto done; cleanup: @@ -398,6 +414,11 @@ svga_context_flush(struct svga_context *svga, svga->rebind.flags.fs = TRUE; svga->rebind.flags.gs = TRUE; + if (svga_have_sm5(svga)) { + svga->rebind.flags.tcs = TRUE; + svga->rebind.flags.tes = TRUE; + } + if (svga_need_to_rebind_resources(svga)) { svga->rebind.flags.query = TRUE; } @@ -447,12 +468,7 @@ svga_hwtnl_flush_retry(struct svga_context *svga) { enum pipe_error ret = PIPE_OK; - ret = svga_hwtnl_flush(svga->hwtnl); - if (ret == PIPE_ERROR_OUT_OF_MEMORY) { - svga_context_flush(svga, NULL); - ret = svga_hwtnl_flush(svga->hwtnl); - } - + SVGA_RETRY_OOM(svga, ret, svga_hwtnl_flush(svga->hwtnl)); assert(ret == PIPE_OK); } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index eef8b88f594..c0c315119f6 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -43,7 +43,7 @@ #include "svga_winsys.h" #include "svga_hw_reg.h" #include "svga3d_shaderdefs.h" - +#include "svga_debug.h" /** Non-GPU queries for gallium HUD */ enum svga_hud { @@ -56,6 +56,7 @@ enum svga_hud { SVGA_QUERY_NUM_BUFFERS_MAPPED, SVGA_QUERY_NUM_TEXTURES_MAPPED, SVGA_QUERY_NUM_BYTES_UPLOADED, + SVGA_QUERY_NUM_COMMAND_BUFFERS, SVGA_QUERY_COMMAND_BUFFER_SIZE, SVGA_QUERY_FLUSH_TIME, SVGA_QUERY_SURFACE_WRITE_FLUSHES, @@ -64,6 +65,8 @@ enum svga_hud { SVGA_QUERY_NUM_BUFFER_UPLOADS, SVGA_QUERY_NUM_CONST_BUF_UPDATES, SVGA_QUERY_NUM_CONST_UPDATES, + SVGA_QUERY_NUM_SHADER_RELOCATIONS, + SVGA_QUERY_NUM_SURFACE_RELOCATIONS, /* running total counters */ SVGA_QUERY_MEMORY_USED, @@ -74,6 +77,7 @@ enum svga_hud { SVGA_QUERY_NUM_GENERATE_MIPMAP, SVGA_QUERY_NUM_FAILED_ALLOCATIONS, SVGA_QUERY_NUM_COMMANDS_PER_DRAW, + SVGA_QUERY_SHADER_MEM_USED, /*SVGA_QUERY_MAX has to be last because it is size of an array*/ SVGA_QUERY_MAX @@ -109,6 +113,8 @@ struct svga_blend_state { unsigned alpha_to_coverage:1; unsigned alpha_to_one:1; unsigned blend_color_alpha:1; /**< set blend color to alpha value */ + unsigned logicop_enabled:1; + unsigned logicop_mode:5; /** Per-render target state */ struct { @@ -269,6 +275,11 @@ struct svga_state struct svga_vertex_shader *vs; struct svga_geometry_shader *user_gs; /* user-specified GS */ struct svga_geometry_shader *gs; /* derived GS */ + /* derived tessellation control shader */ + struct svga_tcs_shader *tcs; + /* derived tessellation evaluation shader */ + struct svga_tes_shader *tes; + struct svga_compute_shader *cs; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; /** Constant buffers for each shader. @@ -286,11 +297,11 @@ struct svga_state int nr_fbs; struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; + struct pipe_scissor_state scissor[SVGA3D_DX_MAX_VIEWPORTS]; struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; struct pipe_clip_state clip; - struct pipe_viewport_state viewport; + struct pipe_viewport_state viewport[SVGA3D_DX_MAX_VIEWPORTS]; unsigned num_samplers[PIPE_SHADER_TYPES]; unsigned num_sampler_views[PIPE_SHADER_TYPES]; @@ -303,6 +314,14 @@ struct svga_state } tex_flags; unsigned sample_mask; + unsigned vertices_per_patch; + float default_tesslevels[6]; /* tessellation (outer[4] + inner[2]) levels */ + struct { + /* Determine the layout of the grid (in block units) to be used. */ + unsigned size[3]; + /* If DispatchIndirect is used, this will has grid size info*/ + struct pipe_resource *indirect; + } grid_info; }; struct svga_prescale { @@ -311,21 +330,27 @@ struct svga_prescale { boolean enabled; }; +struct svga_depthrange { + float zmin; + float zmax; +}; /* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR ) */ struct svga_hw_clear_state { - SVGA3dRect viewport; - - struct { - float zmin, zmax; - } depthrange; - struct pipe_framebuffer_state framebuffer; - struct svga_prescale prescale; + + /* VGPU9 only */ + SVGA3dRect viewport; + struct svga_depthrange depthrange; /* VGPU10 state */ + SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS]; + struct svga_prescale prescale[SVGA3D_DX_MAX_VIEWPORTS]; + struct pipe_scissor_state scissors[SVGA3D_DX_MAX_VIEWPORTS]; + unsigned num_prescale; + unsigned num_rendertargets; struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS]; struct pipe_surface *dsv; @@ -361,6 +386,9 @@ struct svga_hw_draw_state struct svga_shader_variant *fs; struct svga_shader_variant *vs; struct svga_shader_variant *gs; + struct svga_shader_variant *tcs; + struct svga_shader_variant *tes; + struct svga_shader_variant *cs; /** Currently bound constant buffer, per shader stage */ struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; @@ -495,7 +523,7 @@ struct svga_context struct util_bitmask *query_id_bm; struct { - unsigned dirty[SVGA_STATE_MAX]; + uint64_t dirty[SVGA_STATE_MAX]; /** bitmasks of which const buffers are changed */ unsigned dirty_constbufs[PIPE_SHADER_TYPES]; @@ -508,7 +536,7 @@ struct svga_context } state; struct svga_state curr; /* state from the gallium frontend */ - unsigned dirty; /* statechanges since last update_state() */ + uint64_t dirty; /* statechanges since last update_state() */ union { struct { @@ -518,6 +546,9 @@ struct svga_context unsigned vs:1; unsigned fs:1; unsigned gs:1; + unsigned tcs:1; + unsigned tes:1; + unsigned cs:1; unsigned query:1; } flags; unsigned val; @@ -531,7 +562,10 @@ struct svga_context struct util_bitmask *gb_query_alloc_mask; /**< gb query object allocation mask */ struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX]; /**< query mem block mapping */ - struct svga_query *sq[SVGA_QUERY_MAX]; /**< queries currently in progress */ + struct svga_query *sq[SVGA_QUERY_MAX+12]; /**< queries currently in progress */ + /* The last 12 entries are for streamout + * queries for stream 0..3 + */ /** List of buffers with queued transfers */ struct list_head dirty_buffers; @@ -545,6 +579,7 @@ struct svga_context uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */ uint64_t num_buffers_mapped; /**< SVGA_QUERY_NUM_BUFFERS_MAPPED */ uint64_t num_textures_mapped; /**< SVGA_QUERY_NUM_TEXTURES_MAPPED */ + uint64_t num_command_buffers; /**< SVGA_QUERY_NUM_COMMAND_BUFFERS */ uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */ uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */ uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */ @@ -566,16 +601,28 @@ struct svga_context uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */ uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */ uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */ + uint64_t shader_mem_used; /**< SVGA_QUERY_SHADER_MEM_USED */ boolean uses_time; /**< os_time_get() calls needed? */ } hud; /** The currently bound stream output targets */ + boolean in_streamout; /* Set if streamout is active */ unsigned num_so_targets; struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS]; struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS]; struct svga_stream_output *current_so; + /** + * The following states are used in the workaround for auto draw with + * stream instancing. + */ + + /* Last bound SO targets that can be used to get vertex count */ + struct pipe_stream_output_target *vcount_so_targets[SVGA3D_DX_MAX_SOTARGETS]; + unsigned vcount_buffer_stream; /* SO buffer to stream index mask */ + struct pipe_query *so_queries[4]; /* SO stat queries for each stream */ + /** A blend state with blending disabled, for falling back to when blending * is illegal (e.g. an integer texture is bound) */ @@ -601,41 +648,58 @@ struct svga_context boolean render_condition; boolean disable_rasterizer; /* Set if to disable rasterization */ + + struct { + struct svga_tcs_shader *passthrough_tcs; + struct svga_vertex_shader *vs; + struct svga_tes_shader *tes; + unsigned vertices_per_patch; + boolean passthrough; + } tcs; + }; /* A flag for each frontend state object: */ -#define SVGA_NEW_BLEND 0x1 -#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2 -#define SVGA_NEW_RAST 0x4 -#define SVGA_NEW_SAMPLER 0x8 -#define SVGA_NEW_TEXTURE 0x10 -#define SVGA_NEW_VBUFFER 0x20 -#define SVGA_NEW_VELEMENT 0x40 -#define SVGA_NEW_FS 0x80 -#define SVGA_NEW_VS 0x100 -#define SVGA_NEW_FS_CONST_BUFFER 0x200 -#define SVGA_NEW_VS_CONST_BUFFER 0x400 -#define SVGA_NEW_FRAME_BUFFER 0x800 -#define SVGA_NEW_STIPPLE 0x1000 -#define SVGA_NEW_SCISSOR 0x2000 -#define SVGA_NEW_BLEND_COLOR 0x4000 -#define SVGA_NEW_CLIP 0x8000 -#define SVGA_NEW_VIEWPORT 0x10000 -#define SVGA_NEW_PRESCALE 0x20000 -#define SVGA_NEW_REDUCED_PRIMITIVE 0x40000 -#define SVGA_NEW_TEXTURE_BINDING 0x80000 -#define SVGA_NEW_NEED_PIPELINE 0x100000 -#define SVGA_NEW_NEED_SWVFETCH 0x200000 -#define SVGA_NEW_NEED_SWTNL 0x400000 -#define SVGA_NEW_FS_VARIANT 0x800000 -#define SVGA_NEW_VS_VARIANT 0x1000000 -#define SVGA_NEW_TEXTURE_FLAGS 0x4000000 -#define SVGA_NEW_STENCIL_REF 0x8000000 -#define SVGA_NEW_GS 0x10000000 -#define SVGA_NEW_GS_CONST_BUFFER 0x20000000 -#define SVGA_NEW_GS_VARIANT 0x40000000 -#define SVGA_NEW_TEXTURE_CONSTS 0x80000000 +#define SVGA_NEW_BLEND ((uint64_t) 0x1) +#define SVGA_NEW_DEPTH_STENCIL_ALPHA ((uint64_t) 0x2) +#define SVGA_NEW_RAST ((uint64_t) 0x4) +#define SVGA_NEW_SAMPLER ((uint64_t) 0x8) +#define SVGA_NEW_TEXTURE ((uint64_t) 0x10) +#define SVGA_NEW_VBUFFER ((uint64_t) 0x20) +#define SVGA_NEW_VELEMENT ((uint64_t) 0x40) +#define SVGA_NEW_FS ((uint64_t) 0x80) +#define SVGA_NEW_VS ((uint64_t) 0x100) +#define SVGA_NEW_FS_CONST_BUFFER ((uint64_t) 0x200) +#define SVGA_NEW_VS_CONST_BUFFER ((uint64_t) 0x400) +#define SVGA_NEW_FRAME_BUFFER ((uint64_t) 0x800) +#define SVGA_NEW_STIPPLE ((uint64_t) 0x1000) +#define SVGA_NEW_SCISSOR ((uint64_t) 0x2000) +#define SVGA_NEW_BLEND_COLOR ((uint64_t) 0x4000) +#define SVGA_NEW_CLIP ((uint64_t) 0x8000) +#define SVGA_NEW_VIEWPORT ((uint64_t) 0x10000) +#define SVGA_NEW_PRESCALE ((uint64_t) 0x20000) +#define SVGA_NEW_REDUCED_PRIMITIVE ((uint64_t) 0x40000) +#define SVGA_NEW_TEXTURE_BINDING ((uint64_t) 0x80000) +#define SVGA_NEW_NEED_PIPELINE ((uint64_t) 0x100000) +#define SVGA_NEW_NEED_SWVFETCH ((uint64_t) 0x200000) +#define SVGA_NEW_NEED_SWTNL ((uint64_t) 0x400000) +#define SVGA_NEW_FS_VARIANT ((uint64_t) 0x800000) +#define SVGA_NEW_VS_VARIANT ((uint64_t) 0x1000000) +#define SVGA_NEW_TEXTURE_FLAGS ((uint64_t) 0x4000000) +#define SVGA_NEW_STENCIL_REF ((uint64_t) 0x8000000) +#define SVGA_NEW_GS ((uint64_t) 0x10000000) +#define SVGA_NEW_GS_CONST_BUFFER ((uint64_t) 0x20000000) +#define SVGA_NEW_GS_VARIANT ((uint64_t) 0x40000000) +#define SVGA_NEW_TEXTURE_CONSTS ((uint64_t) 0x80000000) +#define SVGA_NEW_TCS ((uint64_t) 0x100000000) +#define SVGA_NEW_TES ((uint64_t) 0x200000000) +#define SVGA_NEW_TCS_VARIANT ((uint64_t) 0x400000000) +#define SVGA_NEW_TES_VARIANT ((uint64_t) 0x800000000) +#define SVGA_NEW_TCS_CONST_BUFFER ((uint64_t) 0x1000000000) +#define SVGA_NEW_TES_CONST_BUFFER ((uint64_t) 0x2000000000) +#define SVGA_NEW_TCS_PARAM ((uint64_t) 0x4000000000) +#define SVGA_NEW_ALL ((uint64_t) 0xFFFFFFFFFFFFFFFF) void svga_init_state_functions( struct svga_context *svga ); @@ -648,9 +712,11 @@ void svga_init_depth_stencil_functions( struct svga_context *svga ); void svga_init_misc_functions( struct svga_context *svga ); void svga_init_rasterizer_functions( struct svga_context *svga ); void svga_init_sampler_functions( struct svga_context *svga ); +void svga_init_cs_functions( struct svga_context *svga ); void svga_init_fs_functions( struct svga_context *svga ); void svga_init_vs_functions( struct svga_context *svga ); void svga_init_gs_functions( struct svga_context *svga ); +void svga_init_ts_functions( struct svga_context *svga ); void svga_init_vertex_functions( struct svga_context *svga ); void svga_init_constbuffer_functions( struct svga_context *svga ); void svga_init_draw_functions( struct svga_context *svga ); @@ -663,6 +729,7 @@ void svga_cleanup_vertex_state( struct svga_context *svga ); void svga_cleanup_sampler_state( struct svga_context *svga ); void svga_cleanup_tss_binding( struct svga_context *svga ); void svga_cleanup_framebuffer( struct svga_context *svga ); +void svga_cleanup_tcs_state( struct svga_context *svga ); void svga_context_flush( struct svga_context *svga, struct pipe_fence_handle **pfence ); @@ -723,6 +790,12 @@ svga_have_sm4_1(const struct svga_context *svga) return svga_screen(svga->pipe.screen)->sws->have_sm4_1; } +static inline boolean +svga_have_sm5(const struct svga_context *svga) +{ + return svga_screen(svga->pipe.screen)->sws->have_sm5; +} + static inline boolean svga_need_to_rebind_resources(const struct svga_context *svga) { @@ -745,5 +818,107 @@ svga_get_time(struct svga_context *svga) return svga->hud.uses_time ? os_time_get() : 0; } +/* + * The SVGA_TRY_XX family of macros can be used to optionally replace a + * function call with an error value, the purpose is to trigger and test + * retry path handling. + */ +#ifdef DEBUG + +/* + * Optionally replace a function call with a PIPE_ERROR_OUT_OF_MEMORY + * return value + */ +#define SVGA_TRY(_func) \ + ((SVGA_DEBUG & DEBUG_RETRY) ? PIPE_ERROR_OUT_OF_MEMORY : (_func)) + +/* Optionally replace a function call with a NULL return value */ +#define SVGA_TRY_PTR(_func) \ + ((SVGA_DEBUG & DEBUG_RETRY) ? NULL : (_func)) + +/* + * Optionally replace a function call with a NULL return value, and set + * the _retry parameter to TRUE. + */ +#define SVGA_TRY_MAP(_func, _retry) \ + ((SVGA_DEBUG & DEBUG_RETRY) ? (_retry) = TRUE, NULL : (_func)) +#else + +#define SVGA_TRY(_func) (_func) + +#define SVGA_TRY_PTR(_func) (_func) + +#define SVGA_TRY_MAP(_func, _retry) (_func) +#endif + +/** + * Enter retry processing after hitting out-of-command space + */ +static inline void +svga_retry_enter(struct svga_context *svga) +{ + /* We shouldn't nest retries, but currently we do. */ + if ((SVGA_DEBUG & DEBUG_RETRY) && svga->swc->in_retry) { + debug_printf("WARNING: Recursive retry. Level: %u.\n", + svga->swc->in_retry); + } + svga->swc->in_retry++; +} + +/** + * Exit retry processing after hitting out-of-command space + */ +static inline void +svga_retry_exit(struct svga_context *svga) +{ + assert(svga->swc->in_retry > 0); + svga->swc->in_retry--; +} + +/** + * Perform a function call, and on failure flush the context and retry, + * asserting that the retry succeeded. On return, the boolean argument + * _retried indicates whether the function call was retried or not. + */ +#define SVGA_RETRY_CHECK(_svga, _func, _retried) \ + do { \ + enum pipe_error ret; \ + \ + ret = SVGA_TRY(_func); \ + (_retried) = (ret != PIPE_OK); \ + if (_retried) { \ + svga_retry_enter(_svga); \ + svga_context_flush(_svga, NULL); \ + ret = (_func); \ + assert(ret == PIPE_OK); \ + svga_retry_exit(_svga); \ + } \ + } while(0) + +/** + * Perform a function call, and on failure flush the context and retry, + * asserting that the retry succeeded. + */ +#define SVGA_RETRY(_svga, _func) \ + do { \ + UNUSED boolean retried; \ + \ + SVGA_RETRY_CHECK(_svga, _func, retried); \ + } while(0) + +/** + * Perform a function call, and on out-of-memory, flush the context and + * retry. The retry return value is stored in _ret for reuse. + */ +#define SVGA_RETRY_OOM(_svga, _ret, _func) \ + do { \ + (_ret) = SVGA_TRY(_func); \ + if ((_ret) == PIPE_ERROR_OUT_OF_MEMORY) { \ + svga_retry_enter(_svga); \ + svga_context_flush(_svga, NULL); \ + (_ret) = (_func); \ + svga_retry_exit(_svga); \ + } \ + } while (0); #endif diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h index 3686cc6d9cc..cdad858b045 100644 --- a/src/gallium/drivers/svga/svga_debug.h +++ b/src/gallium/drivers/svga/svga_debug.h @@ -46,6 +46,7 @@ #define DEBUG_CACHE 0x8000 #define DEBUG_STREAMOUT 0x10000 #define DEBUG_SAMPLERS 0x20000 +#define DEBUG_RETRY 0x100000 #ifdef DEBUG extern int SVGA_DEBUG; diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index e0e55f129b8..f8db818b3d0 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -349,7 +349,7 @@ validate_sampler_resources(struct svga_context *svga) assert(svga_have_vgpu10(svga)); - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) { unsigned count = svga->curr.num_sampler_views[shader]; unsigned i; struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; @@ -379,7 +379,8 @@ validate_sampler_resources(struct svga_context *svga) if (shader == PIPE_SHADER_FRAGMENT && svga->curr.rast->templ.poly_stipple_enable) { - const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + const unsigned unit = + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit; struct svga_pipe_sampler_view *sv = svga->polygon_stipple.sampler_view; @@ -415,7 +416,7 @@ validate_constant_buffers(struct svga_context *svga) assert(svga_have_vgpu10(svga)); - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) { enum pipe_error ret; struct svga_buffer *buffer; struct svga_winsys_surface *handle; @@ -482,6 +483,8 @@ last_command_was_draw(const struct svga_context *svga) case SVGA_3D_CMD_DX_DRAW_INSTANCED: case SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED: case SVGA_3D_CMD_DX_DRAW_AUTO: + case SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT: + case SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT: return true; default: return false; @@ -511,17 +514,51 @@ vertex_buffers_equal(unsigned count, * Prepare the vertex buffers for a drawing command. */ static enum pipe_error -validate_vertex_buffers(struct svga_hwtnl *hwtnl) +validate_vertex_buffers(struct svga_hwtnl *hwtnl, + const struct pipe_stream_output_target *so_vertex_count) { struct svga_context *svga = hwtnl->svga; struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX]; - const unsigned vbuf_count = hwtnl->cmd.vbuf_count; + struct svga_winsys_surface *so_vertex_count_handle; + const unsigned vbuf_count = so_vertex_count ? 1 : hwtnl->cmd.vbuf_count; int last_vbuf = -1; unsigned i; assert(svga_have_vgpu10(svga)); + /* Get handle for each referenced vertex buffer, unless we're using a + * stream-out buffer to specify the drawing information (DrawAuto). + */ + if (so_vertex_count) { + i = 0; + } + else { + for (i = 0; i < vbuf_count; i++) { + struct svga_buffer *sbuf = + svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource); + + if (sbuf) { + vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b.b, + PIPE_BIND_VERTEX_BUFFER); + assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER); + if (vbuffer_handles[i] == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; + vbuffers[i] = &sbuf->b.b; + last_vbuf = i; + } + else { + vbuffers[i] = NULL; + vbuffer_handles[i] = NULL; + } + } + } + + for (; i < svga->state.hw_draw.num_vbuffers; i++) { + vbuffers[i] = NULL; + vbuffer_handles[i] = NULL; + } + /* Get handle for each referenced vertex buffer */ for (i = 0; i < vbuf_count; i++) { struct svga_buffer *sbuf = @@ -558,14 +595,38 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl) svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; } + /* Get handle for the stream out buffer */ + if (so_vertex_count) { + so_vertex_count_handle = svga_buffer_handle(svga, + so_vertex_count->buffer, + (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_STREAM_OUTPUT)); + if (!so_vertex_count_handle) + return PIPE_ERROR_OUT_OF_MEMORY; + } + else { + so_vertex_count_handle = NULL; + } + /* setup vertex buffers */ { SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS]; - for (i = 0; i < vbuf_count; i++) { - vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; - vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; - vbuffer_attrs[i].sid = 0; + if (so_vertex_count) { + /* Set IA slot0 input buffer to the SO buffer */ + assert(vbuf_count == 1); + vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride; + vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset; + vbuffer_attrs[0].sid = 0; + vbuffers[0] = so_vertex_count->buffer; + vbuffer_handles[0] = so_vertex_count_handle; + } + else { + for (i = 0; i < vbuf_count; i++) { + vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; + vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; + vbuffer_attrs[i].sid = 0; + } } /* If any of the vertex buffer state has changed, issue @@ -736,10 +797,14 @@ static enum pipe_error draw_vgpu10(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange *range, unsigned vcount, + unsigned min_index, unsigned max_index, struct pipe_resource *ib, - unsigned start_instance, unsigned instance_count) + unsigned start_instance, unsigned instance_count, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_stream_output_target *so_vertex_count) { struct svga_context *svga = hwtnl->svga; + struct svga_winsys_surface *indirect_handle; enum pipe_error ret; assert(svga_have_vgpu10(svga)); @@ -779,7 +844,7 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, if (ret != PIPE_OK) return ret; - ret = validate_vertex_buffers(hwtnl); + ret = validate_vertex_buffers(hwtnl, so_vertex_count); if (ret != PIPE_OK) return ret; @@ -789,6 +854,16 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, return ret; } + if (indirect) { + indirect_handle = svga_buffer_handle(svga, indirect->buffer, + PIPE_BIND_COMMAND_ARGS_BUFFER); + if (!indirect_handle) + return PIPE_ERROR_OUT_OF_MEMORY; + } + else { + indirect_handle = NULL; + } + /* Set primitive type (line, tri, etc) */ if (svga->state.hw_draw.topology != range->primType) { ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType); @@ -800,15 +875,18 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, if (ib) { /* indexed drawing */ - if (instance_count > 1) { + if (indirect) { + ret = SVGA3D_sm5_DrawIndexedInstancedIndirect(svga->swc, + indirect_handle, + indirect->offset); + } + else if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc, vcount, instance_count, 0, /* startIndexLocation */ range->indexBias, start_instance); - if (ret != PIPE_OK) - return ret; } else { /* non-instanced drawing */ @@ -816,8 +894,9 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, vcount, 0, /* startIndexLocation */ range->indexBias); - if (ret != PIPE_OK) - return ret; + } + if (ret != PIPE_OK) { + return ret; } } else { @@ -835,22 +914,30 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, assert(svga->state.hw_draw.ib == NULL); - if (instance_count > 1) { + if (so_vertex_count) { + /* Stream-output drawing */ + ret = SVGA3D_vgpu10_DrawAuto(svga->swc); + } + else if (indirect) { + ret = SVGA3D_sm5_DrawInstancedIndirect(svga->swc, + indirect_handle, + indirect->offset); + } + else if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, instance_count, range->indexBias, start_instance); - if (ret != PIPE_OK) - return ret; } else { /* non-instanced */ ret = SVGA3D_vgpu10_Draw(svga->swc, vcount, range->indexBias); - if (ret != PIPE_OK) - return ret; + } + if (ret != PIPE_OK) { + return ret; } } @@ -1044,14 +1131,20 @@ check_draw_params(struct svga_hwtnl *hwtnl, /** * All drawing filters down into this function, either directly * on the hardware path or after doing software vertex processing. + * \param indirect if non-null, get the vertex count, first vertex, etc. + * from a buffer. + * \param so_vertex_count if non-null, get the vertex count from a + * stream-output target. */ enum pipe_error svga_hwtnl_prim(struct svga_hwtnl *hwtnl, - const SVGA3dPrimitiveRange * range, + const SVGA3dPrimitiveRange *range, unsigned vcount, - unsigned min_index, - unsigned max_index, struct pipe_resource *ib, - unsigned start_instance, unsigned instance_count) + unsigned min_index, unsigned max_index, + struct pipe_resource *ib, + unsigned start_instance, unsigned instance_count, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_stream_output_target *so_vertex_count) { enum pipe_error ret = PIPE_OK; @@ -1059,17 +1152,14 @@ svga_hwtnl_prim(struct svga_hwtnl *hwtnl, if (svga_have_vgpu10(hwtnl->svga)) { /* draw immediately */ - ret = draw_vgpu10(hwtnl, range, vcount, ib, - start_instance, instance_count); - if (ret != PIPE_OK) { - svga_context_flush(hwtnl->svga, NULL); - ret = draw_vgpu10(hwtnl, range, vcount, ib, - start_instance, instance_count); - assert(ret == PIPE_OK); - } + SVGA_RETRY(hwtnl->svga, draw_vgpu10(hwtnl, range, vcount, min_index, + max_index, ib, start_instance, + instance_count, indirect, + so_vertex_count)); } else { /* batch up drawing commands */ + assert(indirect == NULL); #ifdef DEBUG check_draw_params(hwtnl, range, min_index, max_index, ib); assert(start_instance == 0); diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h index 9d79676d3f9..56d5127051d 100644 --- a/src/gallium/drivers/svga/svga_draw.h +++ b/src/gallium/drivers/svga/svga_draw.h @@ -60,7 +60,8 @@ svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl, enum pipe_error svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, enum pipe_prim_type prim, unsigned start, unsigned count, - unsigned start_instance, unsigned instance_count); + unsigned start_instance, unsigned instance_count, + ubyte vertices_per_patch); enum pipe_error svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 19d5e503137..af27e038bc8 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -175,13 +175,14 @@ done: static enum pipe_error simple_draw_arrays(struct svga_hwtnl *hwtnl, enum pipe_prim_type prim, unsigned start, unsigned count, - unsigned start_instance, unsigned instance_count) + unsigned start_instance, unsigned instance_count, + ubyte vertices_per_patch) { SVGA3dPrimitiveRange range; unsigned hw_prim; unsigned hw_count; - hw_prim = svga_translate_prim(prim, count, &hw_count); + hw_prim = svga_translate_prim(prim, count, &hw_count, vertices_per_patch); if (hw_count == 0) return PIPE_ERROR_BAD_INPUT; @@ -200,14 +201,16 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl, */ return svga_hwtnl_prim(hwtnl, &range, count, 0, count - 1, NULL, - start_instance, instance_count); + start_instance, instance_count, + NULL, NULL); } enum pipe_error svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, enum pipe_prim_type prim, unsigned start, unsigned count, - unsigned start_instance, unsigned instance_count) + unsigned start_instance, unsigned instance_count, + ubyte vertices_per_patch) { enum pipe_prim_type gen_prim; unsigned gen_size, gen_nr; @@ -225,7 +228,7 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, } if (svga->curr.rast->templ.flatshade && - svga->state.hw_draw.fs->constant_color_output) { + svga_fs_variant(svga->state.hw_draw.fs)->constant_color_output) { /* The fragment color is a constant, not per-vertex so the whole * primitive will be the same color (except for possible blending). * We can ignore the current provoking vertex state and use whatever @@ -273,7 +276,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, if (gen_type == U_GENERATE_LINEAR) { ret = simple_draw_arrays(hwtnl, gen_prim, start, count, - start_instance, instance_count); + start_instance, instance_count, + vertices_per_patch); } else { struct pipe_resource *gen_buf = NULL; @@ -299,7 +303,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, count - 1, gen_prim, 0, gen_nr, start_instance, - instance_count); + instance_count, + vertices_per_patch); } if (gen_buf) { diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index 41cd4d18993..b17fe44f747 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -186,14 +186,15 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, enum pipe_prim_type prim, unsigned start, unsigned count, unsigned start_instance, - unsigned instance_count) + unsigned instance_count, + ubyte vertices_per_patch) { SVGA3dPrimitiveRange range; unsigned hw_prim; unsigned hw_count; unsigned index_offset = start * index_size; - hw_prim = svga_translate_prim(prim, count, &hw_count); + hw_prim = svga_translate_prim(prim, count, &hw_count, vertices_per_patch); if (hw_count == 0) return PIPE_OK; /* nothing to draw */ @@ -206,7 +207,8 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, return svga_hwtnl_prim(hwtnl, &range, count, min_index, max_index, index_buffer, - start_instance, instance_count); + start_instance, instance_count, + NULL, NULL); } @@ -234,12 +236,20 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, &gen_size, &gen_nr, &gen_func); } else { + unsigned hw_pv; + + /* There is no geometry ordering with PATCH, so no need to + * consider provoking vertex mode for the translation. + * So use the same api_pv as the hw_pv. + */ + hw_pv = info->mode == PIPE_PRIM_PATCHES ? hwtnl->api_pv : + hwtnl->hw_pv; gen_type = u_index_translator(svga_hw_prims, info->mode, info->index_size, count, hwtnl->api_pv, - hwtnl->hw_pv, + hw_pv, PR_DISABLE, &gen_prim, &gen_size, &gen_nr, &gen_func); } @@ -271,7 +281,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, info->max_index, gen_prim, index_offset, count, info->start_instance, - info->instance_count); + info->instance_count, + info->vertices_per_patch); pipe_resource_reference(&index_buffer, NULL); } else { @@ -299,7 +310,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, gen_prim, gen_offset, gen_nr, info->start_instance, - info->instance_count); + info->instance_count, + info->vertices_per_patch); } if (gen_buf) { diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h index 52a2c0f18b3..475ccc5aae0 100644 --- a/src/gallium/drivers/svga/svga_draw_private.h +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -52,7 +52,8 @@ static const unsigned svga_hw_prims = (1 << PIPE_PRIM_LINES_ADJACENCY) | (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) | (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) | - (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY)); + (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) | + (1 << PIPE_PRIM_PATCHES)); /** @@ -64,7 +65,8 @@ static const unsigned svga_hw_prims = * those to other types of primitives with index/translation code. */ static inline SVGA3dPrimitiveType -svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count) +svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count, + ubyte vertices_per_patch) { switch (mode) { case PIPE_PRIM_POINTS: @@ -107,6 +109,13 @@ svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count) *prim_count = vcount / 2 - 2 ; return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ; + case PIPE_PRIM_PATCHES: + *prim_count = vcount / vertices_per_patch ; + assert(vertices_per_patch >= 1); + assert(vertices_per_patch <= 32); + return (SVGA3D_PRIMITIVE_1_CONTROL_POINT_PATCH - 1) + + vertices_per_patch; + default: assert(0); *prim_count = 0; @@ -218,7 +227,9 @@ svga_hwtnl_prim(struct svga_hwtnl *hwtnl, unsigned min_index, unsigned max_index, struct pipe_resource *ib, - unsigned start_instance, unsigned instance_count); + unsigned start_instance, unsigned instance_count, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_stream_output_target *so_vertex_count); enum pipe_error svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, @@ -231,6 +242,7 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, unsigned start, unsigned count, unsigned start_instance, - unsigned instance_count); + unsigned instance_count, + ubyte vertices_per_patch); #endif diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c index 3f68f0cd67e..bb2f546d67d 100644 --- a/src/gallium/drivers/svga/svga_format.c +++ b/src/gallium/drivers/svga/svga_format.c @@ -71,10 +71,10 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_Z32_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, SVGA3D_D32_FLOAT, 0 }, [ PIPE_FORMAT_Z24_UNORM_S8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 }, [ PIPE_FORMAT_Z24X8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 }, - [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS }, - [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS }, [ PIPE_FORMAT_R32G32B32_FLOAT ] = { SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, TF_GEN_MIPS }, - [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS }, + [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS }, [ PIPE_FORMAT_R32_USCALED ] = { SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R32G32_USCALED ] = { SVGA3D_R32G32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, [ PIPE_FORMAT_R32G32B32_USCALED ] = { SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST }, @@ -176,11 +176,11 @@ static const struct vgpu10_format_entry format_conversion_table[] = [ PIPE_FORMAT_R16G16B16A16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, 0 }, [ PIPE_FORMAT_R32_UINT ] = { SVGA3D_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, 0 }, [ PIPE_FORMAT_R32G32_UINT ] = { SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, 0 }, - [ PIPE_FORMAT_R32G32B32_UINT ] = { SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + [ PIPE_FORMAT_R32G32B32_UINT ] = { SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, 0 }, [ PIPE_FORMAT_R32G32B32A32_UINT ] = { SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, 0 }, [ PIPE_FORMAT_R32_SINT ] = { SVGA3D_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, 0 }, [ PIPE_FORMAT_R32G32_SINT ] = { SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, 0 }, - [ PIPE_FORMAT_R32G32B32_SINT ] = { SVGA3D_R32G32B32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + [ PIPE_FORMAT_R32G32B32_SINT ] = { SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, 0 }, [ PIPE_FORMAT_R32G32B32A32_SINT ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, 0 }, [ PIPE_FORMAT_A8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_000X }, [ PIPE_FORMAT_I8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_XXXX }, @@ -2137,7 +2137,7 @@ svga_is_format_supported(struct pipe_screen *screen, } if (util_format_is_srgb(format) && - (bindings & PIPE_BIND_DISPLAY_TARGET)) { + (bindings & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_RENDER_TARGET))) { /* We only support sRGB rendering with vgpu10 */ return false; } @@ -2252,6 +2252,12 @@ svga_is_dx_format_supported(struct pipe_screen *screen, return svga_format != SVGA3D_FORMAT_INVALID; } + if (bindings & PIPE_BIND_SAMPLER_VIEW && target == PIPE_BUFFER) { + unsigned flags; + svga_translate_texture_buffer_view_format(format, &svga_format, &flags); + return svga_format != SVGA3D_FORMAT_INVALID; + } + svga_format = svga_translate_format(ss, format, bindings); if (svga_format == SVGA3D_FORMAT_INVALID) { return false; diff --git a/src/gallium/drivers/svga/svga_link.c b/src/gallium/drivers/svga/svga_link.c index 0bf40d153b7..c9861a7e481 100644 --- a/src/gallium/drivers/svga/svga_link.c +++ b/src/gallium/drivers/svga/svga_link.c @@ -87,6 +87,15 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, } } + /* Find the index for position */ + linkage->position_index = 0; + for (i = 0; i < outshader_info->num_outputs; i++) { + if (outshader_info->output_semantic_name[i] == TGSI_SEMANTIC_POSITION) { + linkage->position_index = i; + break; + } + } + linkage->num_inputs = inshader_info->num_inputs; /* Things like the front-face register are handled here */ @@ -100,7 +109,8 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, /* Debug */ if (SVGA_DEBUG & DEBUG_TGSI) { - unsigned reg = 0; + uint64_t reg = 0; + uint64_t one = 1; debug_printf("### linkage info: num_inputs=%d input_map_max=%d\n", linkage->num_inputs, linkage->input_map_max); @@ -116,10 +126,8 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, tgsi_interpolate_names[inshader_info->input_interpolate[i]]); /* make sure no repeating register index */ - if (reg & 1 << linkage->input_map[i]) { - assert(0); - } - reg |= 1 << linkage->input_map[i]; + assert((reg & (one << linkage->input_map[i])) == 0); + reg |= one << linkage->input_map[i]; } } } diff --git a/src/gallium/drivers/svga/svga_link.h b/src/gallium/drivers/svga/svga_link.h index c21686eef59..8d3517ea28a 100644 --- a/src/gallium/drivers/svga/svga_link.h +++ b/src/gallium/drivers/svga/svga_link.h @@ -9,6 +9,7 @@ struct svga_context; struct shader_linkage { unsigned num_inputs; + unsigned position_index; /* position register index */ unsigned input_map_max; /* highest index of mapped inputs */ ubyte input_map[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c index b5557d31f44..e24a6beb0e4 100644 --- a/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -91,6 +91,51 @@ svga_translate_blend_func(unsigned mode) } +/** + * Translate gallium logicop mode to SVGA3D logicop mode. + */ +static int +translate_logicop(enum pipe_logicop op) +{ + switch (op) { + case PIPE_LOGICOP_CLEAR: + return SVGA3D_DX11_LOGICOP_CLEAR; + case PIPE_LOGICOP_NOR: + return SVGA3D_DX11_LOGICOP_NOR; + case PIPE_LOGICOP_AND_INVERTED: + return SVGA3D_DX11_LOGICOP_AND_INVERTED; + case PIPE_LOGICOP_COPY_INVERTED: + return SVGA3D_DX11_LOGICOP_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: + return SVGA3D_DX11_LOGICOP_AND_REVERSE; + case PIPE_LOGICOP_INVERT: + return SVGA3D_DX11_LOGICOP_INVERT; + case PIPE_LOGICOP_XOR: + return SVGA3D_DX11_LOGICOP_XOR; + case PIPE_LOGICOP_NAND: + return SVGA3D_DX11_LOGICOP_NAND; + case PIPE_LOGICOP_AND: + return SVGA3D_DX11_LOGICOP_AND; + case PIPE_LOGICOP_EQUIV: + return SVGA3D_DX11_LOGICOP_EQUIV; + case PIPE_LOGICOP_NOOP: + return SVGA3D_DX11_LOGICOP_NOOP; + case PIPE_LOGICOP_OR_INVERTED: + return SVGA3D_DX11_LOGICOP_OR_INVERTED; + case PIPE_LOGICOP_COPY: + return SVGA3D_DX11_LOGICOP_COPY; + case PIPE_LOGICOP_OR_REVERSE: + return SVGA3D_DX11_LOGICOP_OR_REVERSE; + case PIPE_LOGICOP_OR: + return SVGA3D_DX11_LOGICOP_OR; + case PIPE_LOGICOP_SET: + return SVGA3D_DX11_LOGICOP_SET; + default: + return SVGA3D_DX11_LOGICOP_COPY; + } +}; + + /** * Define a vgpu10 blend state object for the given * svga blend state. @@ -100,7 +145,6 @@ define_blend_state_object(struct svga_context *svga, struct svga_blend_state *bs) { SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; - unsigned try; int i; assert(svga_have_vgpu10(svga)); @@ -116,31 +160,141 @@ define_blend_state_object(struct svga_context *svga, perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha; perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha; perRT[i].renderTargetWriteMask = bs->rt[i].writemask; - perRT[i].logicOpEnable = 0; - perRT[i].logicOp = SVGA3D_LOGICOP_COPY; + perRT[i].logicOpEnable = bs->logicop_enabled; + perRT[i].logicOp = bs->logicop_mode; } - /* Loop in case command buffer is full and we need to flush and retry */ - for (try = 0; try < 2; try++) { - enum pipe_error ret; - - ret = SVGA3D_vgpu10_DefineBlendState(svga->swc, - bs->id, - bs->alpha_to_coverage, - bs->independent_blend_enable, - perRT); - if (ret == PIPE_OK) - return; - svga_context_flush(svga, NULL); + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineBlendState(svga->swc, + bs->id, + bs->alpha_to_coverage, + bs->independent_blend_enable, + perRT)); +} + + +/** + * If SVGA3D_DEVCAP_LOGIC_BLENDOPS is false, we can't directly implement + * GL's logicops. But we can emulate some of them. We set up the blending + * state for that here. + */ +static void +emulate_logicop(struct svga_context *svga, + unsigned logicop_func, + struct svga_blend_state *blend, + unsigned buffer) +{ + switch (logicop_func) { + case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_INVERT: + blend->need_white_fragments = TRUE; + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ONE; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_SUBTRACT; + break; + case PIPE_LOGICOP_CLEAR: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_COPY: + blend->rt[buffer].blend_enable = FALSE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_COPY_INVERTED: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_NOOP: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_SET: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ONE; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_AND: + /* Approximate with minimum - works for the 0 & anything case: */ + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_AND_REVERSE: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_AND_INVERTED: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_OR: + /* Approximate with maximum - works for the 1 | anything case: */ + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_OR_REVERSE: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_OR_INVERTED: + blend->rt[buffer].blend_enable = TRUE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_EQUIV: + /* Fill these in with plausible values */ + blend->rt[buffer].blend_enable = FALSE; + blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD; + break; + default: + assert(0); + break; + } + blend->rt[buffer].srcblend_alpha = blend->rt[buffer].srcblend; + blend->rt[buffer].dstblend_alpha = blend->rt[buffer].dstblend; + blend->rt[buffer].blendeq_alpha = blend->rt[buffer].blendeq; + + if (logicop_func == PIPE_LOGICOP_XOR) { + pipe_debug_message(&svga->debug.callback, CONFORMANCE, + "XOR logicop mode has limited support"); + } + else if (logicop_func != PIPE_LOGICOP_COPY) { + pipe_debug_message(&svga->debug.callback, CONFORMANCE, + "general logicops are not supported"); } } + static void * svga_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *templ) { struct svga_context *svga = svga_context(pipe); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); unsigned i; @@ -166,107 +320,18 @@ svga_create_blend_state(struct pipe_context *pipe, * top of D3D9 API. Instead we try to simulate with various blend modes. */ if (templ->logicop_enable) { - switch (templ->logicop_func) { - case PIPE_LOGICOP_XOR: - case PIPE_LOGICOP_INVERT: - blend->need_white_fragments = TRUE; - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_SUBTRACT; - break; - case PIPE_LOGICOP_CLEAR: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; - break; - case PIPE_LOGICOP_COPY: - blend->rt[i].blend_enable = FALSE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; - break; - case PIPE_LOGICOP_COPY_INVERTED: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; - break; - case PIPE_LOGICOP_NOOP: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; - break; - case PIPE_LOGICOP_SET: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; - break; - case PIPE_LOGICOP_AND: - /* Approximate with minimum - works for the 0 & anything case: */ - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; - break; - case PIPE_LOGICOP_AND_REVERSE: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; - break; - case PIPE_LOGICOP_AND_INVERTED: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; - break; - case PIPE_LOGICOP_OR: - /* Approximate with maximum - works for the 1 | anything case: */ - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; - break; - case PIPE_LOGICOP_OR_REVERSE: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; - break; - case PIPE_LOGICOP_OR_INVERTED: - blend->rt[i].blend_enable = TRUE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; - blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; - break; - case PIPE_LOGICOP_NAND: - case PIPE_LOGICOP_NOR: - case PIPE_LOGICOP_EQUIV: - /* Fill these in with plausible values */ - blend->rt[i].blend_enable = FALSE; - blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; - blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; - blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; - break; - default: - assert(0); - break; - } - blend->rt[i].srcblend_alpha = blend->rt[i].srcblend; - blend->rt[i].dstblend_alpha = blend->rt[i].dstblend; - blend->rt[i].blendeq_alpha = blend->rt[i].blendeq; - - if (templ->logicop_func == PIPE_LOGICOP_XOR) { - pipe_debug_message(&svga->debug.callback, CONFORMANCE, - "XOR logicop mode has limited support"); + if (ss->haveBlendLogicops) { + blend->logicop_enabled = TRUE; + blend->logicop_mode = translate_logicop(templ->logicop_func); + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ZERO; + blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO; } - else if (templ->logicop_func != PIPE_LOGICOP_COPY) { - pipe_debug_message(&svga->debug.callback, CONFORMANCE, - "general logicops are not supported"); + else { + emulate_logicop(svga, templ->logicop_func, blend, i); } } else { @@ -374,14 +439,7 @@ static void svga_delete_blend_state(struct pipe_context *pipe, (struct svga_blend_state *) blend; if (svga_have_vgpu10(svga) && bs->id != SVGA3D_INVALID_ID) { - enum pipe_error ret; - - ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id)); if (bs->id == svga->state.hw_draw.blend_id) svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID; diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index 31806ceb1e1..a756509ce76 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -80,7 +80,6 @@ intra_surface_copy(struct svga_context *svga, struct pipe_resource *tex, unsigned dst_x, unsigned dst_y, unsigned dst_z, unsigned width, unsigned height, unsigned depth) { - enum pipe_error ret; SVGA3dCopyBox box; struct svga_texture *stex; @@ -102,15 +101,8 @@ intra_surface_copy(struct svga_context *svga, struct pipe_resource *tex, box.srcy = src_y; box.srcz = src_z; - ret = SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc, - stex->handle, level, layer_face, &box); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc, - stex->handle, level, layer_face, &box); - assert(ret == PIPE_OK); - } - + SVGA_RETRY(svga, SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc, stex->handle, + level, layer_face, &box)); /* Mark the texture subresource as rendered-to. */ svga_set_texture_rendered_to(stex, layer_face, level); } @@ -630,11 +622,13 @@ try_blit(struct svga_context *svga, const struct pipe_blit_info *blit_info) util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems); util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs); util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs); + util_blitter_save_tessctrl_shader(svga->blitter, svga->curr.tcs); + util_blitter_save_tesseval_shader(svga->blitter, svga->curr.tes); util_blitter_save_so_targets(svga->blitter, svga->num_so_targets, (struct pipe_stream_output_target**)svga->so_targets); util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast); - util_blitter_save_viewport(svga->blitter, &svga->curr.viewport); - util_blitter_save_scissor(svga->blitter, &svga->curr.scissor); + util_blitter_save_viewport(svga->blitter, &svga->curr.viewport[0]); + util_blitter_save_scissor(svga->blitter, &svga->curr.scissor[0]); util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs); util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend); util_blitter_save_depth_stencil_alpha(svga->blitter, @@ -835,7 +829,6 @@ svga_resource_copy_region(struct pipe_context *pipe, if (dst_tex->target == PIPE_BUFFER && src_tex->target == PIPE_BUFFER) { /* can't copy within the same buffer, unfortunately */ if (svga_have_vgpu10(svga) && src_tex != dst_tex) { - enum pipe_error ret; struct svga_winsys_surface *src_surf; struct svga_winsys_surface *dst_surf; struct svga_buffer *dbuffer = svga_buffer(dst_tex); @@ -844,15 +837,9 @@ svga_resource_copy_region(struct pipe_context *pipe, src_surf = svga_buffer_handle(svga, src_tex, sbuffer->bind_flags); dst_surf = svga_buffer_handle(svga, dst_tex, dbuffer->bind_flags); - ret = SVGA3D_vgpu10_BufferCopy(svga->swc, src_surf, dst_surf, - src_box->x, dstx, src_box->width); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BufferCopy(svga->swc, src_surf, dst_surf, - src_box->x, dstx, src_box->width); - assert(ret == PIPE_OK); - } - + SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, src_surf, + dst_surf, src_box->x, dstx, + src_box->width)); dbuffer->dirty = TRUE; } else { diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c index 89a9b533f91..490f91b5fc9 100644 --- a/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -45,11 +45,13 @@ begin_blit(struct svga_context *svga) util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems); util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs); util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs); + util_blitter_save_tessctrl_shader(svga->blitter, svga->curr.tcs); + util_blitter_save_tesseval_shader(svga->blitter, svga->curr.tes); util_blitter_save_so_targets(svga->blitter, svga->num_so_targets, (struct pipe_stream_output_target**)svga->so_targets); util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast); - util_blitter_save_viewport(svga->blitter, &svga->curr.viewport); - util_blitter_save_scissor(svga->blitter, &svga->curr.scissor); + util_blitter_save_viewport(svga->blitter, &svga->curr.viewport[0]); + util_blitter_save_scissor(svga->blitter, &svga->curr.scissor[0]); util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs); util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend); util_blitter_save_depth_stencil_alpha(svga->blitter, @@ -248,15 +250,7 @@ svga_clear(struct pipe_context *pipe, unsigned buffers, const struct pipe_scisso /* flush any queued prims (don't want them to appear after the clear!) */ svga_hwtnl_flush_retry(svga); - ret = try_clear( svga, buffers, color, depth, stencil ); - - if (ret == PIPE_ERROR_OUT_OF_MEMORY) { - /* Flush command buffer and retry: - */ - svga_context_flush( svga, NULL ); - - ret = try_clear( svga, buffers, color, depth, stencil ); - } + SVGA_RETRY_OOM(svga, ret, try_clear( svga, buffers, color, depth, stencil)); /* * Mark target surfaces as dirty @@ -277,7 +271,6 @@ svga_clear_texture(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_surface *svga_surface_dst; - enum pipe_error ret; struct pipe_surface tmpl; struct pipe_surface *surface; @@ -309,8 +302,8 @@ svga_clear_texture(struct pipe_context *pipe, stencil = 0; } else { - util_format_unpack_z_float(surface->format, &depth, data, 1); - util_format_unpack_s_8uint(surface->format, &stencil, data, 1); + desc->unpack_z_float(&depth, 0, data, 0, 1, 1); + desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1); } if (util_format_has_depth(desc)) { @@ -334,17 +327,9 @@ svga_clear_texture(struct pipe_context *pipe, /* clearing whole surface, use direct VGPU10 command */ - ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, - clear_flags, - stencil, depth); - if (ret != PIPE_OK) { - /* flush and try again */ - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, - clear_flags, - stencil, depth); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv, + clear_flags, + stencil, depth)); } else { /* To clear subtexture use software fallback */ @@ -367,7 +352,18 @@ svga_clear_texture(struct pipe_context *pipe, color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0; } else { - util_format_unpack_rgba(surface->format, color.ui, data, 1); + if (util_format_is_pure_sint(surface->format)) { + /* signed integer */ + desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1); + } + else if (util_format_is_pure_uint(surface->format)) { + /* unsigned integer */ + desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1); + } + else { + /* floating point */ + desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); + } } /* Setup render target view */ @@ -390,14 +386,8 @@ svga_clear_texture(struct pipe_context *pipe, } else { /* clearing whole surface using VGPU10 command */ - ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, - color.f); - if (ret != PIPE_OK) { - svga_context_flush(svga,NULL); - ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, - color.f); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv, + color.f)); } } else { @@ -526,13 +516,9 @@ svga_clear_render_target(struct pipe_context *pipe, height); } else { enum pipe_error ret; - - ret = svga_try_clear_render_target(svga, dst, color); - if (ret == PIPE_ERROR_OUT_OF_MEMORY) { - svga_context_flush( svga, NULL ); - ret = svga_try_clear_render_target(svga, dst, color); - } - + + SVGA_RETRY_OOM(svga, ret, svga_try_clear_render_target(svga, dst, + color)); assert (ret == PIPE_OK); } svga_toggle_render_condition(svga, render_condition_enabled, TRUE); diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 5ebd17cf0ea..e6fabfc995e 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -24,12 +24,16 @@ **********************************************************/ +#include "util/u_draw.h" +#include "util/format/u_format.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_prim_restart.h" #include "svga_context.h" +#include "svga_draw_private.h" +#include "svga_screen.h" #include "svga_draw.h" #include "svga_shader.h" #include "svga_surface.h" @@ -37,59 +41,137 @@ #include "svga_debug.h" #include "svga_resource_buffer.h" -/* Returns TRUE if we are currently using flat shading. - */ -static boolean -is_using_flat_shading(const struct svga_context *svga) -{ - return - svga->state.hw_draw.fs ? svga->state.hw_draw.fs->uses_flat_interp : FALSE; -} - static enum pipe_error retry_draw_range_elements(struct svga_context *svga, const struct pipe_draw_info *info, unsigned count) { - enum pipe_error ret; - SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWELEMENTS); - ret = svga_hwtnl_draw_range_elements(svga->hwtnl, info, count); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_hwtnl_draw_range_elements(svga->hwtnl, info, count); - } + SVGA_RETRY(svga, svga_hwtnl_draw_range_elements(svga->hwtnl, info, count)); - assert (ret == PIPE_OK); SVGA_STATS_TIME_POP(svga_sws(svga)); - return ret; + return PIPE_OK; } static enum pipe_error -retry_draw_arrays(struct svga_context *svga, - enum pipe_prim_type prim, unsigned start, unsigned count, - unsigned start_instance, unsigned instance_count) +retry_draw_arrays( struct svga_context *svga, + enum pipe_prim_type prim, unsigned start, unsigned count, + unsigned start_instance, unsigned instance_count, + ubyte vertices_per_patch) { enum pipe_error ret; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWARRAYS); - for (unsigned try = 0; try < 2; try++) { - ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count, - start_instance, instance_count); - if (ret == PIPE_OK) - break; - svga_context_flush(svga, NULL); - } - + SVGA_RETRY_OOM(svga, ret, svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, + count, start_instance, + instance_count, + vertices_per_patch)); SVGA_STATS_TIME_POP(svga_sws(svga)); return ret; } +/** + * Auto draw (get vertex count from a transform feedback result). + */ +static enum pipe_error +retry_draw_auto(struct svga_context *svga, + const struct pipe_draw_info *info) +{ + assert(svga_have_sm5(svga)); + assert(info->count_from_stream_output); + assert(info->instance_count == 1); + /* SO drawing implies core profile and none of these prim types */ + assert(info->mode != PIPE_PRIM_QUADS && + info->mode != PIPE_PRIM_QUAD_STRIP && + info->mode != PIPE_PRIM_POLYGON); + + if (info->mode == PIPE_PRIM_LINE_LOOP) { + /* XXX need to do a fallback */ + assert(!"draw auto fallback not supported yet"); + return PIPE_OK; + } + else { + SVGA3dPrimitiveRange range; + unsigned hw_count; + + range.primType = svga_translate_prim(info->mode, 12, &hw_count, + info->vertices_per_patch); + range.primitiveCount = 0; + range.indexArray.surfaceId = SVGA3D_INVALID_ID; + range.indexArray.offset = 0; + range.indexArray.stride = 0; + range.indexWidth = 0; + range.indexBias = 0; + + SVGA_RETRY(svga, svga_hwtnl_prim + (svga->hwtnl, &range, + 0, /* vertex count comes from SO buffer */ + 0, /* don't know min index */ + ~0u, /* don't know max index */ + NULL, /* no index buffer */ + 0, /* start instance */ + 1, /* only 1 instance supported */ + NULL, /* indirect drawing info */ + info->count_from_stream_output)); + + return PIPE_OK; + } +} + + +/** + * Indirect draw (get vertex count, start index, etc. from a buffer object. + */ +static enum pipe_error +retry_draw_indirect(struct svga_context *svga, + const struct pipe_draw_info *info) +{ + assert(svga_have_sm5(svga)); + assert(info->indirect); + /* indirect drawing implies core profile and none of these prim types */ + assert(info->mode != PIPE_PRIM_QUADS && + info->mode != PIPE_PRIM_QUAD_STRIP && + info->mode != PIPE_PRIM_POLYGON); + + if (info->mode == PIPE_PRIM_LINE_LOOP) { + /* need to do a fallback */ + util_draw_indirect(&svga->pipe, info); + return PIPE_OK; + } + else { + SVGA3dPrimitiveRange range; + unsigned hw_count; + + range.primType = svga_translate_prim(info->mode, 12, &hw_count, + info->vertices_per_patch); + range.primitiveCount = 0; /* specified in indirect buffer */ + range.indexArray.surfaceId = SVGA3D_INVALID_ID; + range.indexArray.offset = 0; + range.indexArray.stride = 0; + range.indexWidth = info->index_size; + range.indexBias = 0; /* specified in indirect buffer */ + + SVGA_RETRY(svga, svga_hwtnl_prim + (svga->hwtnl, &range, + 0, /* vertex count is in indirect buffer */ + 0, /* don't know min index */ + ~0u, /* don't know max index */ + info->index.resource, + info->start_instance, + 0, /* don't know instance count */ + info->indirect, + NULL)); /* SO vertex count */ + + return PIPE_OK; + } +} + + /** * Determine if we need to implement primitive restart with a fallback * path which breaks the original primitive into sub-primitive at the @@ -116,6 +198,21 @@ need_fallback_prim_restart(const struct svga_context *svga, } +/** + * A helper function to return the vertex count from the primitive count + * returned from the stream output statistics query for the specified stream. + */ +static unsigned +get_vcount_from_stream_output(struct svga_context *svga, + const struct pipe_draw_info *info, + unsigned stream) +{ + unsigned primcount; + primcount = svga_get_primcount_from_stream_output(svga, stream); + return u_vertices_for_prims(info->mode, primcount); +} + + static void svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -147,6 +244,18 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; } + if (svga->curr.vertices_per_patch != info->vertices_per_patch) { + svga->curr.vertices_per_patch = info->vertices_per_patch; + + /* If input patch size changes, we need to notifiy the TCS + * code to reevaluate the shader variant since the + * vertices per patch count is a constant in the control + * point count declaration. + */ + if (svga->curr.tcs || svga->curr.tes) + svga->dirty |= SVGA_NEW_TCS_PARAM; + } + if (need_fallback_prim_restart(svga, info)) { enum pipe_error r; r = util_draw_vbo_without_prim_restart(pipe, info); @@ -155,7 +264,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) goto done; } - if (!u_trim_pipe_prim(info->mode, &count)) + if (!info->indirect && !info->count_from_stream_output && + !u_trim_pipe_prim(info->mode, &count)) goto done; needed_swtnl = svga->state.sw.need_swtnl; @@ -189,20 +299,53 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); + svga_update_state_retry(svga, SVGA_STATE_HW_DRAW); + /** determine if flatshade is to be used after svga_update_state() * in case the fragment shader is changed. */ svga_hwtnl_set_flatshade(svga->hwtnl, svga->curr.rast->templ.flatshade || - is_using_flat_shading(svga), + svga_is_using_flat_shading(svga), svga->curr.rast->templ.flatshade_first); - if (info->index_size) { + if (info->count_from_stream_output) { + unsigned stream = 0; + assert(count == 0); + + /* If the vertex count is from the stream output of a non-zero stream + * or the draw info specifies instancing, we will need a workaround + * since the draw_auto command does not support stream instancing. + * The workaround requires querying the vertex count from the + * stream output statistics query for the specified stream and then + * fallback to the regular draw function. + */ + + /* Check the stream index of the specified stream output target */ + for (unsigned i = 0; i < ARRAY_SIZE(svga->so_targets); i++) { + if (svga->vcount_so_targets[i] == info->count_from_stream_output) { + stream = (svga->vcount_buffer_stream >> (i * 4)) & 0xf; + break; + } + } + if (info->instance_count > 1 || stream > 0) { + count = get_vcount_from_stream_output(svga, info, stream); + } + } + + if (info->count_from_stream_output && count == 0) { + ret = retry_draw_auto(svga, info); + } + else if (info->indirect) { + ret = retry_draw_indirect(svga, info); + } + else if (info->index_size) { ret = retry_draw_range_elements(svga, info, count); } else { ret = retry_draw_arrays(svga, info->mode, info->start, count, - info->start_instance, info->instance_count); + info->start_instance, info->instance_count, + info->vertices_per_patch); } } diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index a2f00b1d290..7795afbfe1f 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -37,7 +37,7 @@ #include "svga_shader.h" -static void * +void * svga_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -69,7 +69,7 @@ svga_create_fs_state(struct pipe_context *pipe, } -static void +void svga_bind_fs_state(struct pipe_context *pipe, void *shader) { struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; @@ -85,6 +85,7 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader) { struct svga_context *svga = svga_context(pipe); struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; + struct svga_fragment_shader *next_fs; struct svga_shader_variant *variant, *tmp; enum pipe_error ret; @@ -92,27 +93,32 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader) assert(fs->base.parent == NULL); - draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader); + while (fs) { + next_fs = (struct svga_fragment_shader *) fs->base.next; + + draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader); - for (variant = fs->base.variants; variant; variant = tmp) { - tmp = variant->next; + for (variant = fs->base.variants; variant; variant = tmp) { + tmp = variant->next; - /* Check if deleting currently bound shader */ - if (variant == svga->state.hw_draw.fs) { - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.fs) { ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); - assert(ret == PIPE_OK); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); + assert(ret == PIPE_OK); + } + svga->state.hw_draw.fs = NULL; } - svga->state.hw_draw.fs = NULL; + + svga_destroy_shader_variant(svga, variant); } - svga_destroy_shader_variant(svga, variant); + FREE((void *)fs->base.tokens); + FREE(fs); + fs = next_fs; } - - FREE((void *)fs->base.tokens); - FREE(fs); } diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index e0d1e51f412..61b4897c5d6 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -40,9 +40,16 @@ svga_set_scissor_states(struct pipe_context *pipe, unsigned num_scissors, const struct pipe_scissor_state *scissors) { + ASSERTED struct svga_screen *svgascreen = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); + unsigned i, num_sc; + + assert(start_slot + num_scissors <= svgascreen->max_viewports); + + for (i = 0, num_sc = start_slot; i < num_scissors; i++) { + svga->curr.scissor[num_sc++] = scissors[i]; /* struct copy */ + } - memcpy(&svga->curr.scissor, scissors, sizeof(*scissors)); svga->dirty |= SVGA_NEW_SCISSOR; } @@ -199,8 +206,14 @@ svga_set_viewport_states(struct pipe_context *pipe, const struct pipe_viewport_state *viewports) { struct svga_context *svga = svga_context(pipe); + ASSERTED struct svga_screen *svgascreen = svga_screen(pipe->screen); + unsigned i, num_vp; - svga->curr.viewport = *viewports; /* struct copy */ + assert(start_slot + num_viewports <= svgascreen->max_viewports); + + for (i = 0, num_vp = start_slot; i < num_viewports; i++) { + svga->curr.viewport[num_vp++] = viewports[i]; /* struct copy */ + } svga->dirty |= SVGA_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 1b9b17e2a8e..38874deb414 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -50,6 +50,7 @@ struct svga_query { SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */ unsigned id; /** Per-context query identifier */ + boolean active; /** TRUE if query is active */ struct pipe_fence_handle *fence; @@ -214,10 +215,10 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, * will hold queries of the same type. Multiple memory blocks can be allocated * for a particular query type. * - * Currently each memory block is of 184 bytes. We support up to 128 + * Currently each memory block is of 184 bytes. We support up to 512 * memory blocks. The query memory size is arbitrary right now. * Each occlusion query takes about 8 bytes. One memory block can accomodate - * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion + * 23 occlusion queries. 512 of those blocks can support up to 11K occlusion * queries. That seems reasonable for now. If we think this limit is * not enough, we can increase the limit or try to grow the mob in runtime. * Note, SVGA device does not impose one mob per context for queries, @@ -228,7 +229,7 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery. */ #define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2) -#define SVGA_QUERY_MEM_SIZE (128 * SVGA_QUERY_MEM_BLOCK_SIZE) +#define SVGA_QUERY_MEM_SIZE (512 * SVGA_QUERY_MEM_BLOCK_SIZE) struct svga_qmem_alloc_entry { @@ -243,31 +244,34 @@ struct svga_qmem_alloc_entry /** * Allocate a memory block from the query object memory - * \return -1 if out of memory, else index of the query memory block + * \return NULL if out of memory, else pointer to the query memory block */ -static int +static struct svga_qmem_alloc_entry * allocate_query_block(struct svga_context *svga) { int index; unsigned offset; + struct svga_qmem_alloc_entry *alloc_entry = NULL; /* Find the next available query block */ index = util_bitmask_add(svga->gb_query_alloc_mask); if (index == UTIL_BITMASK_INVALID_INDEX) - return -1; + return NULL; offset = index * SVGA_QUERY_MEM_BLOCK_SIZE; if (offset >= svga->gb_query_len) { unsigned i; + /* Deallocate the out-of-range index */ + util_bitmask_clear(svga->gb_query_alloc_mask, index); + index = -1; + /** * All the memory blocks are allocated, lets see if there is * any empty memory block around that can be freed up. */ - index = -1; for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) { - struct svga_qmem_alloc_entry *alloc_entry; struct svga_qmem_alloc_entry *prev_alloc_entry = NULL; alloc_entry = svga->gb_query_map[i]; @@ -286,9 +290,20 @@ allocate_query_block(struct svga_context *svga) } } } + + if (index == -1) { + debug_printf("Query memory object is full\n"); + return NULL; + } } - return index; + if (!alloc_entry) { + assert(index != -1); + alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry); + alloc_entry->block_index = index; + } + + return alloc_entry; } /** @@ -346,17 +361,14 @@ allocate_query_block_entry(struct svga_context *svga, unsigned len) { struct svga_qmem_alloc_entry *alloc_entry; - int block_index = -1; - block_index = allocate_query_block(svga); - if (block_index == -1) - return NULL; - alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry); + alloc_entry = allocate_query_block(svga); if (!alloc_entry) return NULL; - alloc_entry->block_index = block_index; - alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE; + assert(alloc_entry->block_index != -1); + alloc_entry->start_offset = + alloc_entry->block_index * SVGA_QUERY_MEM_BLOCK_SIZE; alloc_entry->nquery = 0; alloc_entry->alloc_mask = util_bitmask_create(); alloc_entry->next = NULL; @@ -508,17 +520,16 @@ define_query_vgpu10(struct svga_context *svga, sq->gb_query = svga->gb_query; - /* Allocate an integer ID for this query */ - sq->id = util_bitmask_add(svga->query_id_bm); - if (sq->id == UTIL_BITMASK_INVALID_INDEX) - return PIPE_ERROR_OUT_OF_MEMORY; + /* Make sure query length is in multiples of 8 bytes */ + qlen = align(resultLen + sizeof(SVGA3dQueryState), 8); /* Find a slot for this query in the gb object */ - qlen = resultLen + sizeof(SVGA3dQueryState); sq->offset = allocate_query(svga, sq->svga_type, qlen); if (sq->offset == -1) return PIPE_ERROR_OUT_OF_MEMORY; + assert((sq->offset & 7) == 0); + SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n", sq->svga_type, sq->id, sq->offset); @@ -731,7 +742,19 @@ svga_create_query(struct pipe_context *pipe, case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: assert(svga_have_vgpu10(svga)); - sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS; + + /* Until the device supports the new query type for multiple streams, + * we will use the single stream query type for stream 0. + */ + if (svga_have_sm5(svga) && index > 0) { + assert(index < 4); + + sq->svga_type = SVGA3D_QUERYTYPE_SOSTATS_STREAM0 + index; + } + else { + assert(index == 0); + sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS; + } ret = define_query_vgpu10(svga, sq, sizeof(SVGADXStreamOutStatisticsQueryResult)); if (ret != PIPE_OK) @@ -969,7 +992,10 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) assert(!"unexpected query type in svga_begin_query()"); } - svga->sq[sq->type] = sq; + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n", + __FUNCTION__, sq, sq->id, sq->type, sq->svga_type); + + sq->active = TRUE; return true; } @@ -988,12 +1014,12 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, sq, sq->id); - if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq) + if (sq->type == PIPE_QUERY_TIMESTAMP && !sq->active) svga_begin_query(pipe, q); svga_hwtnl_flush_retry(svga); - assert(svga->sq[sq->type] == sq); + assert(sq->active); switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: @@ -1083,7 +1109,7 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) default: assert(!"unexpected query type in svga_end_query()"); } - svga->sq[sq->type] = NULL; + sq->active = FALSE; return true; } diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 7d5936fa1ec..7764a855391 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -118,6 +118,9 @@ define_rasterizer_object(struct svga_context *svga, rast->templ.line_stipple_factor : 0; const uint16 line_pattern = rast->templ.line_stipple_enable ? rast->templ.line_stipple_pattern : 0; + const uint8 pv_last = !rast->templ.flatshade_first && + svgascreen->haveProvokingVertex; + unsigned try; rast->id = util_bitmask_add(svga->rast_object_id_bm); @@ -194,7 +197,18 @@ svga_create_rasterizer_state(struct pipe_context *pipe, rast->templ.point_smooth = TRUE; } - if (templ->point_smooth) { + if (rast->templ.point_smooth && + rast->templ.point_size_per_vertex == 0 && + rast->templ.point_size <= screen->pointSmoothThreshold) { + /* If the point size is less than the threshold, disable smoothing. + * Note that this only effects point rendering when we use the + * pipe_rasterizer_state::point_size value, not when the point size + * is set in the VS. + */ + rast->templ.point_smooth = FALSE; + } + + if (rast->templ.point_smooth) { /* For smooth points we need to generate fragments for at least * a 2x2 region. Otherwise the quad we draw may be too small and * we may generate no fragments at all. @@ -237,7 +251,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe, } } - if (!svga_have_vgpu10(svga) && templ->point_smooth) { + if (!svga_have_vgpu10(svga) && rast->templ.point_smooth) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; rast->need_pipeline_points_str = "smooth points"; } diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c index 0c6c034751c..380ceaa3aa7 100644 --- a/src/gallium/drivers/svga/svga_pipe_streamout.c +++ b/src/gallium/drivers/svga/svga_pipe_streamout.c @@ -44,6 +44,89 @@ svga_stream_output_target(struct pipe_stream_output_target *s) return (struct svga_stream_output_target *)s; } + +/** + * A helper function to send different version of the DefineStreamOutput command + * depending on if device is SM5 capable or not. + */ +static enum pipe_error +svga_define_stream_output(struct svga_context *svga, + SVGA3dStreamOutputId soid, + uint32 numOutputStreamEntries, + uint32 numOutputStreamStrides, + uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS], + const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS], + uint32 rasterizedStream, + struct svga_stream_output *streamout) +{ + unsigned i; + + SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __FUNCTION__, soid); + SVGA_DBG(DEBUG_STREAMOUT, + "numOutputStreamEntires=%d\n", numOutputStreamEntries); + + for (i = 0; i < numOutputStreamEntries; i++) { + SVGA_DBG(DEBUG_STREAMOUT, + " %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n", + i, decls[i].outputSlot, decls[i].registerIndex, + decls[i].registerMask, decls[i].stream); + } + + SVGA_DBG(DEBUG_STREAMOUT, + "numOutputStreamStrides=%d\n", numOutputStreamStrides); + for (i = 0; i < numOutputStreamStrides; i++) { + SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]); + } + SVGA_DBG(DEBUG_STREAMOUT, "\n"); + + if (svga_have_sm5(svga) && + (numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS || + numOutputStreamStrides > 1)) { + unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry) + * numOutputStreamEntries; + struct svga_winsys_buffer *declBuf; + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; + void *map; + + declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED, + bufSize); + if (!declBuf) + return PIPE_ERROR; + map = sws->buffer_map(sws, declBuf, PIPE_TRANSFER_WRITE); + if (!map) { + sws->buffer_destroy(sws, declBuf); + return PIPE_ERROR; + } + + /* copy decls to buffer */ + memcpy(map, decls, bufSize); + + /* unmap buffer */ + sws->buffer_unmap(sws, declBuf); + streamout->declBuf = declBuf; + + SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput + (svga->swc, soid, + numOutputStreamEntries, + numOutputStreamStrides, + streamStrides, + streamout->declBuf, + rasterizedStream, + bufSize)); + } else { + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid, + numOutputStreamEntries, + streamStrides, + decls)); + } + + return PIPE_OK; +} + + +/** + * Creates stream output from the stream output info. + */ struct svga_stream_output * svga_create_stream_output(struct svga_context *svga, struct svga_shader *shader, @@ -52,9 +135,13 @@ svga_create_stream_output(struct svga_context *svga, struct svga_stream_output *streamout; SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS]; unsigned strides[SVGA3D_DX_MAX_SOTARGETS]; + unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS]; + unsigned numStreamStrides = 0; + unsigned numDecls; unsigned i; enum pipe_error ret; unsigned id; + ASSERTED unsigned maxDecls; assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS); @@ -64,7 +151,12 @@ svga_create_stream_output(struct svga_context *svga, if (!svga_have_vgpu10(svga)) return NULL; - assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS); + if (svga_have_sm5(svga)) + maxDecls = SVGA3D_MAX_STREAMOUT_DECLS; + else if (svga_have_vgpu10(svga)) + maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS; + + assert(info->num_outputs <= maxDecls); /* Allocate an integer ID for the stream output */ id = util_bitmask_add(svga->stream_output_id_bm); @@ -81,15 +173,17 @@ svga_create_stream_output(struct svga_context *svga, streamout->info = *info; streamout->id = id; streamout->pos_out_index = -1; + streamout->streammask = 0; - SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__, - info->num_outputs, id); - - /* init whole decls and stride arrays to zero to avoid garbage values */ + /* Init whole decls and stride arrays to zero to avoid garbage values */ memset(decls, 0, sizeof(decls)); memset(strides, 0, sizeof(strides)); + memset(dstOffset, 0, sizeof(dstOffset)); + + SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs\n", + __FUNCTION__, info->num_outputs); - for (i = 0; i < info->num_outputs; i++) { + for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) { unsigned reg_idx = info->output[i].register_index; unsigned buf_idx = info->output[i].output_buffer; const enum tgsi_semantic sem_name = @@ -97,17 +191,59 @@ svga_create_stream_output(struct svga_context *svga, assert(buf_idx <= PIPE_MAX_SO_BUFFERS); + numStreamStrides = MAX2(numStreamStrides, buf_idx); + + SVGA_DBG(DEBUG_STREAMOUT, + " %d: register_index=%d output_buffer=%d stream=%d\n", + i, reg_idx, buf_idx, info->output[i].stream); + + SVGA_DBG(DEBUG_STREAMOUT, + " dst_offset=%d start_component=%d num_components=%d\n", + info->output[i].dst_offset, + info->output[i].start_component, + info->output[i].num_components); + + streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4); + + /** + * Check if the destination offset of the current output + * is at the expected offset. If it is greater, then that means + * there is a gap in the stream output. We need to insert + * extra declaration entries with an invalid register index + * to specify a gap. + */ + while (info->output[i].dst_offset > dstOffset[buf_idx]) { + + unsigned numComponents = info->output[i].dst_offset - + dstOffset[buf_idx];; + + assert(svga_have_sm5(svga)); + + /* We can only specify at most 4 components to skip in each + * declaration entry. + */ + numComponents = numComponents > 4 ? 4 : numComponents; + + decls[numDecls].outputSlot = buf_idx, + decls[numDecls].stream = info->output[i].stream; + decls[numDecls].registerIndex = SVGA3D_INVALID_ID; + decls[numDecls].registerMask = (1 << numComponents) - 1; + + dstOffset[buf_idx] += numComponents; + numDecls++; + } + if (sem_name == TGSI_SEMANTIC_POSITION) { /** * Check if streaming out POSITION. If so, replace the * register index with the index for NON_ADJUSTED POSITION. */ - decls[i].registerIndex = shader->info.num_outputs; + decls[numDecls].registerIndex = shader->info.num_outputs; /* Save this output index, so we can tell later if this stream output * includes an output of a vertex position */ - streamout->pos_out_index = i; + streamout->pos_out_index = numDecls; } else if (sem_name == TGSI_SEMANTIC_CLIPDIST) { /** @@ -116,44 +252,49 @@ svga_create_stream_output(struct svga_context *svga, * It's valid to write to ClipDistance variable for non-enabled * clip planes. */ - decls[i].registerIndex = shader->info.num_outputs + 1 + - shader->info.output_semantic_index[reg_idx]; + decls[numDecls].registerIndex = + shader->info.num_outputs + 1 + + shader->info.output_semantic_index[reg_idx]; } else { - decls[i].registerIndex = reg_idx; + decls[numDecls].registerIndex = reg_idx; } - decls[i].outputSlot = buf_idx; - decls[i].registerMask = + decls[numDecls].outputSlot = buf_idx; + decls[numDecls].registerMask = ((1 << info->output[i].num_components) - 1) << info->output[i].start_component; - SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n", - i, decls[i].outputSlot, decls[i].registerIndex, - decls[i].registerMask); + decls[numDecls].stream = info->output[i].stream; + assert(decls[numDecls].stream == 0 || svga_have_sm5(svga)); + + /* Set the bit in streammask for the enabled stream */ + streamout->streammask |= 1 << info->output[i].stream; + + /* Update the expected offset for the next output */ + dstOffset[buf_idx] += info->output[i].num_components; strides[buf_idx] = info->stride[buf_idx] * sizeof(float); } - ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id, - info->num_outputs, - strides, - decls); + assert(numDecls <= maxDecls); + + /* Send the DefineStreamOutput command. + * Note, rasterizedStream is always 0. + */ + ret = svga_define_stream_output(svga, id, + numDecls, numStreamStrides+1, + strides, decls, 0, streamout); + if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id, - info->num_outputs, - strides, - decls); - if (ret != PIPE_OK) { - util_bitmask_clear(svga->stream_output_id_bm, id); - FREE(streamout); - streamout = NULL; - } + util_bitmask_clear(svga->stream_output_id_bm, id); + FREE(streamout); + streamout = NULL; } return streamout; } + enum pipe_error svga_set_stream_output(struct svga_context *svga, struct svga_stream_output *streamout) @@ -168,12 +309,28 @@ svga_set_stream_output(struct svga_context *svga, streamout, id); if (svga->current_so != streamout) { + + /* Before unbinding the current stream output, stop the stream output + * statistics queries for the active streams. + */ + if (svga_have_sm5(svga) && svga->current_so) { + svga->vcount_buffer_stream = svga->current_so->buffer_stream; + svga_end_stream_output_queries(svga, svga->current_so->streammask); + } + enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id); if (ret != PIPE_OK) { return ret; } svga->current_so = streamout; + + /* After binding the new stream output, start the stream output + * statistics queries for the active streams. + */ + if (svga_have_sm5(svga) && svga->current_so) { + svga_begin_stream_output_queries(svga, svga->current_so->streammask); + } } return PIPE_OK; @@ -183,17 +340,18 @@ void svga_delete_stream_output(struct svga_context *svga, struct svga_stream_output *streamout) { - enum pipe_error ret; + struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout); assert(svga_have_vgpu10(svga)); assert(streamout != NULL); - ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id); + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, + streamout->id)); + + if (svga_have_sm5(svga) && streamout->declBuf) { + sws->buffer_destroy(sws, streamout->declBuf); } /* Release the ID */ @@ -203,6 +361,7 @@ svga_delete_stream_output(struct svga_context *svga, FREE(streamout); } + static struct pipe_stream_output_target * svga_create_stream_output_target(struct pipe_context *pipe, struct pipe_resource *buffer, @@ -252,9 +411,9 @@ svga_set_stream_output_targets(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS]; - enum pipe_error ret; unsigned i; unsigned num_so_targets; + boolean begin_so_queries = num_targets > 0; SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__, num_targets); @@ -269,6 +428,14 @@ svga_set_stream_output_targets(struct pipe_context *pipe, sbuf->dirty = TRUE; } + /* Before the currently bound streamout targets are unbound, + * save them in case they need to be referenced to retrieve the + * number of vertices being streamed out. + */ + for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) { + svga->vcount_so_targets[i] = svga->so_targets[i]; + } + assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS); for (i = 0; i < num_targets; i++) { @@ -283,7 +450,16 @@ svga_set_stream_output_targets(struct pipe_context *pipe, & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); svga->so_targets[i] = &sot->base; - soBindings[i].offset = sot->base.buffer_offset; + if (offsets[i] == -1) { + soBindings[i].offset = -1; + + /* The streamout is being resumed. There is no need to restart streamout statistics + * queries for the draw-auto fallback since those queries are still active. + */ + begin_so_queries = FALSE; + } + else + soBindings[i].offset = sot->base.buffer_offset + offsets[i]; /* The size cannot extend beyond the end of the buffer. Clamp it. */ size = MIN2(sot->base.buffer_size, @@ -299,15 +475,22 @@ svga_set_stream_output_targets(struct pipe_context *pipe, } num_so_targets = MAX2(svga->num_so_targets, num_targets); - ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, - soBindings, svga->so_surfaces); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, - soBindings, svga->so_surfaces); - } - + SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, + soBindings, svga->so_surfaces)); svga->num_so_targets = num_targets; + + if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) { + + /* If there are aleady active queries and we need to start a new streamout, + * we need to stop the current active queries first. + */ + if (svga->in_streamout) { + svga_end_stream_output_queries(svga, svga->current_so->streammask); + } + + /* Start stream out statistics queries for the new streamout */ + svga_begin_stream_output_queries(svga, svga->current_so->streammask); + } } /** @@ -329,6 +512,7 @@ svga_rebind_stream_output_targets(struct svga_context *svga) return PIPE_OK; } + void svga_init_stream_output_functions(struct svga_context *svga) { @@ -336,3 +520,117 @@ svga_init_stream_output_functions(struct svga_context *svga) svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target; svga->pipe.set_stream_output_targets = svga_set_stream_output_targets; } + + +/** + * A helper function to create stream output statistics queries for each stream. + * These queries are created as a workaround for DrawTransformFeedbackInstanced or + * DrawTransformFeedbackStreamInstanced when auto draw doesn't support + * instancing or non-0 stream. In this case, the vertex count will + * be retrieved from the stream output statistics query. + */ +void +svga_create_stream_output_queries(struct svga_context *svga) +{ + unsigned i; + + if (!svga_have_sm5(svga)) + return; + + for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { + svga->so_queries[i] = svga->pipe.create_query(&svga->pipe, + PIPE_QUERY_SO_STATISTICS, i); + assert(svga->so_queries[i] != NULL); + } +} + + +/** + * Destroy the stream output statistics queries for the draw-auto workaround. + */ +void +svga_destroy_stream_output_queries(struct svga_context *svga) +{ + unsigned i; + + if (!svga_have_sm5(svga)) + return; + + for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { + svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]); + } +} + + +/** + * Start stream output statistics queries for the active streams. + */ +void +svga_begin_stream_output_queries(struct svga_context *svga, + unsigned streammask) +{ + assert(svga_have_sm5(svga)); + assert(!svga->in_streamout); + + for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { + bool ret; + if (streammask & (1 << i)) { + ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]); + } + (void) ret; + } + svga->in_streamout = TRUE; + + return; +} + + +/** + * Stop stream output statistics queries for the active streams. + */ +void +svga_end_stream_output_queries(struct svga_context *svga, + unsigned streammask) +{ + assert(svga_have_sm5(svga)); + + if (!svga->in_streamout) + return; + + for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { + bool ret; + if (streammask & (1 << i)) { + ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]); + } + (void) ret; + } + svga->in_streamout = FALSE; + + return; +} + + +/** + * Return the primitive count returned from the stream output statistics query + * for the specified stream. + */ +unsigned +svga_get_primcount_from_stream_output(struct svga_context *svga, + unsigned stream) +{ + unsigned primcount = 0; + union pipe_query_result result; + bool ret; + + if (svga->current_so) { + svga_end_stream_output_queries(svga, svga->current_so->streammask); + } + + ret = svga->pipe.get_query_result(&svga->pipe, + svga->so_queries[stream], + TRUE, &result); + if (ret) + primcount = result.so_statistics.num_primitives_written; + + return primcount; +} diff --git a/src/gallium/drivers/svga/svga_pipe_ts.c b/src/gallium/drivers/svga/svga_pipe_ts.c new file mode 100644 index 00000000000..12a3bf486b7 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_ts.c @@ -0,0 +1,219 @@ +/********************************************************** + * Copyright 2018-2020 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_context.h" +#include "svga_shader.h" + +static void +svga_set_tess_state(struct pipe_context *pipe, + const float default_outer_level[4], + const float default_inner_level[2]) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + + for (i = 0; i < 4; i++) { + svga->curr.default_tesslevels[i] = default_outer_level[i]; + } + for (i = 0; i < 2; i++) { + svga->curr.default_tesslevels[i + 4] = default_inner_level[i]; + } +} + + +static void * +svga_create_tcs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_tcs_shader *tcs; + + tcs = CALLOC_STRUCT(svga_tcs_shader); + if (!tcs) + return NULL; + + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_CREATETCS); + + tcs->base.tokens = tgsi_dup_tokens(templ->tokens); + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(tcs->base.tokens, &tcs->base.info); + + tcs->base.id = svga->debug.shader_id++; + + tcs->generic_outputs = svga_get_generic_outputs_mask(&tcs->base.info); + + SVGA_STATS_TIME_POP(svga_sws(svga)); + return tcs; +} + + +static void +svga_bind_tcs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_tcs_shader *tcs = (struct svga_tcs_shader *) shader; + struct svga_context *svga = svga_context(pipe); + + if (tcs == svga->curr.tcs) + return; + + svga->curr.tcs = tcs; + svga->dirty |= SVGA_NEW_TCS; +} + + +static void +svga_delete_tcs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_tcs_shader *tcs = (struct svga_tcs_shader *) shader; + struct svga_tcs_shader *next_tcs; + struct svga_shader_variant *variant, *tmp; + + svga_hwtnl_flush_retry(svga); + + assert(tcs->base.parent == NULL); + + while (tcs) { + next_tcs = (struct svga_tcs_shader *)tcs->base.next; + for (variant = tcs->base.variants; variant; variant = tmp) { + tmp = variant->next; + + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.tcs) { + SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_HS, NULL)); + svga->state.hw_draw.tcs = NULL; + } + + svga_destroy_shader_variant(svga, variant); + } + + FREE((void *)tcs->base.tokens); + FREE(tcs); + tcs = next_tcs; + } +} + + +void +svga_cleanup_tcs_state(struct svga_context *svga) +{ + if (svga->tcs.passthrough_tcs) { + svga_delete_tcs_state(&svga->pipe, svga->tcs.passthrough_tcs); + } +} + + +static void * +svga_create_tes_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_tes_shader *tes; + + tes = CALLOC_STRUCT(svga_tes_shader); + if (!tes) + return NULL; + + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_CREATETES); + + tes->base.tokens = tgsi_dup_tokens(templ->tokens); + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(tes->base.tokens, &tes->base.info); + + tes->base.id = svga->debug.shader_id++; + + tes->generic_inputs = svga_get_generic_inputs_mask(&tes->base.info); + + SVGA_STATS_TIME_POP(svga_sws(svga)); + return tes; +} + + +static void +svga_bind_tes_state(struct pipe_context *pipe, void *shader) +{ + struct svga_tes_shader *tes = (struct svga_tes_shader *) shader; + struct svga_context *svga = svga_context(pipe); + + if (tes == svga->curr.tes) + return; + + svga->curr.tes = tes; + svga->dirty |= SVGA_NEW_TES; +} + + +static void +svga_delete_tes_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_tes_shader *tes = (struct svga_tes_shader *) shader; + struct svga_tes_shader *next_tes; + struct svga_shader_variant *variant, *tmp; + + svga_hwtnl_flush_retry(svga); + + assert(tes->base.parent == NULL); + + while (tes) { + next_tes = (struct svga_tes_shader *)tes->base.next; + for (variant = tes->base.variants; variant; variant = tmp) { + tmp = variant->next; + + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.tes) { + SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_DS, NULL)); + svga->state.hw_draw.tes = NULL; + } + + svga_destroy_shader_variant(svga, variant); + } + + FREE((void *)tes->base.tokens); + FREE(tes); + tes = next_tes; + } +} + + +void +svga_init_ts_functions(struct svga_context *svga) +{ + svga->pipe.set_tess_state = svga_set_tess_state; + svga->pipe.create_tcs_state = svga_create_tcs_state; + svga->pipe.bind_tcs_state = svga_bind_tcs_state; + svga->pipe.delete_tcs_state = svga_delete_tcs_state; + svga->pipe.create_tes_state = svga_create_tes_state; + svga->pipe.bind_tes_state = svga_bind_tes_state; + svga->pipe.delete_tes_state = svga_delete_tes_state; +} diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index a475e000f2e..aa7396c2c6b 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -166,6 +166,7 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader) { struct svga_context *svga = svga_context(pipe); struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; + struct svga_vertex_shader *next_vs; struct svga_shader_variant *variant, *tmp; enum pipe_error ret; @@ -173,37 +174,42 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader) assert(vs->base.parent == NULL); - /* Check if there is a generated geometry shader to go with this - * vertex shader. If there is, then delete the geometry shader as well. - */ - if (vs->gs != NULL) { - svga->pipe.delete_gs_state(&svga->pipe, vs->gs); - } + while (vs) { + next_vs = (struct svga_vertex_shader *)vs->base.next; - if (vs->base.stream_output != NULL) - svga_delete_stream_output(svga, vs->base.stream_output); + /* Check if there is a generated geometry shader to go with this + * vertex shader. If there is, then delete the geometry shader as well. + */ + if (vs->gs != NULL) { + svga->pipe.delete_gs_state(&svga->pipe, vs->gs); + } - draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader); + if (vs->base.stream_output != NULL) + svga_delete_stream_output(svga, vs->base.stream_output); - for (variant = vs->base.variants; variant; variant = tmp) { - tmp = variant->next; + draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader); - /* Check if deleting currently bound shader */ - if (variant == svga->state.hw_draw.vs) { - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); + for (variant = vs->base.variants; variant; variant = tmp) { + tmp = variant->next; + + /* Check if deleting currently bound shader */ + if (variant == svga->state.hw_draw.vs) { ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); - assert(ret == PIPE_OK); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); + assert(ret == PIPE_OK); + } + svga->state.hw_draw.vs = NULL; } - svga->state.hw_draw.vs = NULL; + + svga_destroy_shader_variant(svga, variant); } - svga_destroy_shader_variant(svga, variant); + FREE((void *)vs->base.tokens); + FREE(vs); + vs = next_vs; } - - FREE((void *)vs->base.tokens); - FREE(vs); } diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 6629a8cc14d..4f19b8ca035 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -53,7 +53,8 @@ svga_buffer_needs_hw_storage(const struct svga_screen *ss, const struct pipe_resource *template) { unsigned bind_mask = (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT); + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_SHADER_BUFFER | PIPE_BIND_COMMAND_ARGS_BUFFER); if (ss->sws->have_vgpu10) { /* @@ -478,6 +479,9 @@ svga_buffer_create(struct pipe_screen *screen, */ bind_flags |= (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER); + + /* It may be used for shader resource as well. */ + bind_flags |= PIPE_BIND_SAMPLER_VIEW; } if (svga_buffer_create_host_surface(ss, sbuf, bind_flags) != PIPE_OK) diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 2e9ca060059..5d2b934e7c1 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -175,6 +175,11 @@ svga_buffer_create_host_surface(struct svga_screen *ss, if (bind_flags & PIPE_BIND_SAMPLER_VIEW) sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE; + if (bind_flags & PIPE_BIND_COMMAND_ARGS_BUFFER) { + assert(ss->sws->have_sm5); + sbuf->key.flags |= SVGA3D_SURFACE_DRAWINDIRECT_ARGS; + } + if (!bind_flags && sbuf->b.b.usage == PIPE_USAGE_STAGING) { /* This surface is to be used with the * SVGA3D_CMD_DX_TRANSFER_FROM_BUFFER command, and no other diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 2aa4e52faa7..1bae8c39595 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -133,26 +133,25 @@ svga_transfer_dma(struct svga_context *svga, } } else { - int y, h, y_max; + int y, h, srcy; unsigned blockheight = util_format_get_blockheight(st->base.resource->format); h = st->hw_nblocksy * blockheight; - y_max = st->box.y + st->box.h; + srcy = 0; - for (y = st->box.y; y < y_max; y += h) { + for (y = 0; y < st->box.h; y += h) { unsigned offset, length; void *hw, *sw; - if (y + h > y_max) - h = y_max - y; + if (y + h > st->box.h) + h = st->box.h - y; /* Transfer band must be aligned to pixel block boundaries */ assert(y % blockheight == 0); assert(h % blockheight == 0); - /* First band starts at the top of the SW buffer. */ - offset = (y - st->box.y) * st->base.stride / blockheight; + offset = y * st->base.stride / blockheight; length = h * st->base.stride / blockheight; sw = (uint8_t *) st->swbuf + offset; @@ -160,9 +159,9 @@ svga_transfer_dma(struct svga_context *svga, if (transfer == SVGA3D_WRITE_HOST_VRAM) { unsigned usage = PIPE_TRANSFER_WRITE; - /* Don't write to an in-flight DMA buffer. Synchronize or - * discard in-flight storage. */ - if (y != st->box.y) { + /* Wait for the previous DMAs to complete */ + /* TODO: keep one DMA (at half the size) in the background */ + if (y) { svga_context_flush(svga, NULL); usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } @@ -178,7 +177,7 @@ svga_transfer_dma(struct svga_context *svga, svga_transfer_dma_band(svga, st, transfer, st->box.x, y, st->box.z, st->box.w, h, st->box.d, - 0, 0, 0, flags); + 0, srcy, 0, flags); /* * Prevent the texture contents to be discarded on the next band @@ -488,6 +487,18 @@ svga_texture_transfer_map_direct(struct svga_context *svga, svga_context_flush(svga, NULL); } + if (map && rebind) { + enum pipe_error ret; + + ret = SVGA3D_BindGBSurface(swc, surf); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BindGBSurface(swc, surf); + assert(ret == PIPE_OK); + } + svga_context_flush(svga, NULL); + } + /* * Make sure we return NULL if the map fails */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 2975bfefdfa..f7e3a900290 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -49,6 +49,10 @@ /* NOTE: this constant may get moved into a svga3d*.h header file */ #define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024) +#ifndef MESA_GIT_SHA1 +#define MESA_GIT_SHA1 "(unknown git revision)" +#endif + #ifdef DEBUG int SVGA_DEBUG = 0; @@ -249,7 +253,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) 12 /* 2048x2048 */); case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0; + return sws->have_sm5 ? SVGA3D_SM5_MAX_SURFACE_ARRAYSIZE : + (sws->have_vgpu10 ? SVGA3D_SM4_MAX_SURFACE_ARRAYSIZE : 0); case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */ return 1; @@ -266,7 +271,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: return 1; /* The color outputs of vertex shaders are not clamped */ case PIPE_CAP_VERTEX_COLOR_CLAMPED: - return 0; /* The driver can't clamp vertex colors */ + return sws->have_vgpu10; case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: return 0; /* The driver can't clamp fragment colors */ @@ -274,10 +279,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; /* expected for GL_ARB_framebuffer_object */ case PIPE_CAP_GLSL_FEATURE_LEVEL: - return sws->have_vgpu10 ? 330 : 120; + if (sws->have_sm5) { + return 410; + } else if (sws->have_vgpu10) { + return 330; + } else { + return 120; + } case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: - return sws->have_vgpu10 ? 330 : 120; + return sws->have_sm5 ? 410 : (sws->have_vgpu10 ? 330 : 120); case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: @@ -303,10 +314,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: return sws->have_vgpu10 ? 4 : 0; case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: - return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0; + return sws->have_sm5 ? SVGA3D_MAX_STREAMOUT_DECLS : + (sws->have_vgpu10 ? SVGA3D_MAX_DX10_STREAMOUT_DECLS : 0); case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return sws->have_sm5; case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: - return 0; + return sws->have_sm5; case PIPE_CAP_TEXTURE_MULTISAMPLE: return svgascreen->ms_samples ? 1 : 0; @@ -350,7 +363,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return sws->have_sm4_1; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: - return sws->have_sm4_1 ? 1 : 0; /* only single-channel textures */ + /* SM4_1 supports only single-channel textures where as SM5 supports + * all four channel textures */ + return sws->have_sm5 ? 4 : + (sws->have_sm4_1 ? 1 : 0); + case PIPE_CAP_DRAW_INDIRECT: + return sws->have_sm5; + case PIPE_CAP_MAX_VERTEX_STREAMS: + return sws->have_sm5 ? 4 : 0; + case PIPE_CAP_COMPUTE: + return 0; case PIPE_CAP_MAX_VARYINGS: return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: @@ -362,9 +384,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TEXTURE_BARRIER: - case PIPE_CAP_MAX_VERTEX_STREAMS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_COMPUTE: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: @@ -372,7 +392,6 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: @@ -410,7 +429,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; case PIPE_CAP_MAX_VIEWPORTS: - return 1; + assert((!sws->have_vgpu10 && svgascreen->max_viewports == 1) || + (sws->have_vgpu10 && + svgascreen->max_viewports == SVGA3D_DX_MAX_VIEWPORTS)); + return svgascreen->max_viewports; case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; @@ -427,10 +449,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return sws->have_vgpu10; case PIPE_CAP_CLEAR_TEXTURE: return sws->have_vgpu10; + case PIPE_CAP_DOUBLES: + return sws->have_sm5; case PIPE_CAP_UMA: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: @@ -453,7 +476,6 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: case PIPE_CAP_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: - case PIPE_CAP_DOUBLES: case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_TGSI_TEX_TXF_LZ: @@ -487,6 +509,9 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) return 32; case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: return 1 << 27; + /* Verify this once protocol is finalized. Setting it to minimum value. */ + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + return sws->have_sm5 ? 30 : 0; default: return u_pipe_screen_get_param_defaults(screen, param); } @@ -674,12 +699,12 @@ vgpu10_get_shader_param(struct pipe_screen *screen, assert(sws->have_vgpu10); (void) sws; /* silence unused var warnings in non-debug builds */ - /* Only VS, GS, FS supported */ - if (shader != PIPE_SHADER_VERTEX && - shader != PIPE_SHADER_GEOMETRY && - shader != PIPE_SHADER_FRAGMENT) { + if ((!sws->have_sm5) && + (shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL)) + return 0; + + if (shader == PIPE_SHADER_COMPUTE) return 0; - } /* NOTE: we do not query the device for any caps/limits at this time */ @@ -697,6 +722,10 @@ vgpu10_get_shader_param(struct pipe_screen *screen, return VGPU10_MAX_FS_INPUTS; else if (shader == PIPE_SHADER_GEOMETRY) return VGPU10_MAX_GS_INPUTS; + else if (shader == PIPE_SHADER_TESS_CTRL) + return VGPU11_MAX_HS_INPUTS; + else if (shader == PIPE_SHADER_TESS_EVAL) + return VGPU11_MAX_DS_INPUT_CONTROL_POINTS; else return VGPU10_MAX_VS_INPUTS; case PIPE_SHADER_CAP_MAX_OUTPUTS: @@ -704,6 +733,10 @@ vgpu10_get_shader_param(struct pipe_screen *screen, return VGPU10_MAX_FS_OUTPUTS; else if (shader == PIPE_SHADER_GEOMETRY) return VGPU10_MAX_GS_OUTPUTS; + else if (shader == PIPE_SHADER_TESS_CTRL) + return VGPU11_MAX_HS_OUTPUTS; + else if (shader == PIPE_SHADER_TESS_EVAL) + return VGPU11_MAX_DS_OUTPUTS; else return VGPU10_MAX_VS_OUTPUTS; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: @@ -844,6 +877,8 @@ svga_get_driver_query_info(struct pipe_screen *screen, PIPE_DRIVER_QUERY_TYPE_UINT64), QUERY("num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED, PIPE_DRIVER_QUERY_TYPE_BYTES), + QUERY("num-command-buffers", SVGA_QUERY_NUM_COMMAND_BUFFERS, + PIPE_DRIVER_QUERY_TYPE_UINT64), QUERY("command-buffer-size", SVGA_QUERY_COMMAND_BUFFER_SIZE, PIPE_DRIVER_QUERY_TYPE_BYTES), QUERY("flush-time", SVGA_QUERY_FLUSH_TIME, @@ -860,6 +895,10 @@ svga_get_driver_query_info(struct pipe_screen *screen, PIPE_DRIVER_QUERY_TYPE_UINT64), QUERY("num-const-updates", SVGA_QUERY_NUM_CONST_UPDATES, PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-shader-relocations", SVGA_QUERY_NUM_SHADER_RELOCATIONS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-surface-relocations", SVGA_QUERY_NUM_SURFACE_RELOCATIONS, + PIPE_DRIVER_QUERY_TYPE_UINT64), /* running total counters */ QUERY("memory-used", SVGA_QUERY_MEMORY_USED, @@ -878,6 +917,8 @@ svga_get_driver_query_info(struct pipe_screen *screen, PIPE_DRIVER_QUERY_TYPE_UINT64), QUERY("num-commands-per-draw", SVGA_QUERY_NUM_COMMANDS_PER_DRAW, PIPE_DRIVER_QUERY_TYPE_FLOAT), + QUERY("shader-mem-used", SVGA_QUERY_SHADER_MEM_USED, + PIPE_DRIVER_QUERY_TYPE_UINT64), }; #undef QUERY @@ -1012,9 +1053,10 @@ svga_screen_create(struct svga_winsys_screen *sws) goto error2; } - debug_printf("%s enabled = %u\n", - sws->have_sm4_1 ? "SM4_1" : "VGPU10", - sws->have_sm4_1 ? 1 : sws->have_vgpu10); + debug_printf("%s enabled\n", + sws->have_sm5 ? "SM5" : + sws->have_sm4_1 ? "SM4_1" : + sws->have_vgpu10 ? "VGPU10" : "VGPU9"); debug_printf("Mesa: %s %s (%s)\n", svga_get_name(screen), PACKAGE_VERSION, MESA_GIT_SHA1); @@ -1081,13 +1123,23 @@ svga_screen_create(struct svga_winsys_screen *sws) svgascreen->ms_samples |= 1 << 3; } + if (sws->have_sm5 && debug_get_bool_option("SVGA_MSAA", TRUE)) { + if (get_bool_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_8X, FALSE)) + svgascreen->ms_samples |= 1 << 7; + } + /* Maximum number of constant buffers */ svgascreen->max_const_buffers = get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1); svgascreen->max_const_buffers = MIN2(svgascreen->max_const_buffers, SVGA_MAX_CONST_BUFS); + svgascreen->haveBlendLogicops = + get_bool_cap(sws, SVGA3D_DEVCAP_LOGIC_BLENDOPS, FALSE); + screen->is_format_supported = svga_is_dx_format_supported; + + svgascreen->max_viewports = SVGA3D_DX_MAX_VIEWPORTS; } else { /* VGPU9 */ @@ -1122,6 +1174,9 @@ svga_screen_create(struct svga_winsys_screen *sws) /* No multisampling */ svgascreen->ms_samples = 0; + + /* Only one viewport */ + svgascreen->max_viewports = 1; } /* common VGPU9 / VGPU10 caps */ diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index 12b93468da2..aa0001b11e5 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -50,10 +50,13 @@ struct svga_screen /** Device caps */ boolean haveProvokingVertex; boolean haveLineStipple, haveLineSmooth; + boolean haveBlendLogicops; float maxLineWidth, maxLineWidthAA; float maxPointSize; + float pointSmoothThreshold; /** Disable point AA for sizes less than this */ unsigned max_color_buffers; unsigned max_const_buffers; + unsigned max_viewports; unsigned ms_samples; struct { diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c index b5bcd51a7fc..a0e5f5ff2b9 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.c +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -311,6 +311,9 @@ svga_screen_cache_add(struct svga_screen *svgascreen, } +/* Maximum number of invalidate surface commands in a command buffer */ +# define SVGA_MAX_SURFACE_TO_INVALIDATE 1000 + /** * Called during the screen flush to move all buffers not in a validate list * into the unused list. @@ -354,6 +357,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, next = curr->next; } + unsigned nsurf = 0; curr = cache->validated.next; next = curr->next; while (curr != &cache->validated) { @@ -381,12 +385,14 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, * this function itself is called inside svga_context_flush(). */ svga->swc->flush(svga->swc, NULL); + nsurf = 0; ret = SVGA3D_InvalidateGBSurface(svga->swc, entry->handle); assert(ret == PIPE_OK); } /* add the entry to the invalidated list */ list_add(&entry->head, &cache->invalidated); + nsurf++; } curr = next; @@ -394,6 +400,16 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, } mtx_unlock(&cache->mutex); + + /** + * In some rare cases (when running ARK survival), we hit the max number + * of surface relocations with invalidated surfaces during context flush. + * So if the number of invalidated surface exceeds a certain limit (1000), + * we'll do another winsys flush. + */ + if (nsurf > SVGA_MAX_SURFACE_TO_INVALIDATE) { + svga->swc->flush(svga->swc, NULL); + } } diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c index 3a7516945c6..52f1153fd61 100644 --- a/src/gallium/drivers/svga/svga_shader.c +++ b/src/gallium/drivers/svga/svga_shader.c @@ -229,22 +229,25 @@ static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = { */ void svga_init_shader_key_common(const struct svga_context *svga, - enum pipe_shader_type shader, + enum pipe_shader_type shader_type, + const struct svga_shader *shader, struct svga_compile_key *key) { unsigned i, idx = 0; - assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views)); + assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views)); /* In case the number of samplers and sampler_views doesn't match, * loop over the lower of the two counts. */ - key->num_textures = MAX2(svga->curr.num_sampler_views[shader], - svga->curr.num_samplers[shader]); + key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type], + svga->curr.num_samplers[shader_type]); for (i = 0; i < key->num_textures; i++) { - struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; - const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; + struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i]; + const struct svga_sampler_state + *sampler = svga->curr.sampler[shader_type][i]; + if (view) { assert(view->texture); assert(view->texture->target < (1 << 4)); /* texture_target:4 */ @@ -304,6 +307,12 @@ svga_init_shader_key_common(const struct svga_context *svga, if (view->texture->format == PIPE_FORMAT_DXT1_RGB || view->texture->format == PIPE_FORMAT_DXT1_SRGB) swizzle_tab = set_alpha; + + /* Save the compare function as we need to handle + * depth compare in the shader. + */ + key->tex[i].compare_mode = sampler->compare_mode; + key->tex[i].compare_func = sampler->compare_func; } key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r]; @@ -314,8 +323,10 @@ svga_init_shader_key_common(const struct svga_context *svga, if (sampler) { if (!sampler->normalized_coords) { - assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ - key->tex[i].width_height_idx = idx++; + if (view) { + assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ + key->tex[i].width_height_idx = idx++; + } key->tex[i].unnormalized = TRUE; ++key->num_unnormalized_coords; @@ -326,6 +337,9 @@ svga_init_shader_key_common(const struct svga_context *svga, } } } + + key->clamp_vertex_color = svga->curr.rast ? + svga->curr.rast->templ.clamp_vertex_color : 0; } @@ -380,6 +394,8 @@ define_gb_shader_vgpu9(struct svga_context *svga, variant->gb_shader = sws->shader_create(sws, variant->type, variant->tokens, codeLen); + svga->hud.shader_mem_used += codeLen; + if (!variant->gb_shader) return PIPE_ERROR_OUT_OF_MEMORY; @@ -398,6 +414,7 @@ define_gb_shader_vgpu10(struct svga_context *svga, { struct svga_winsys_context *swc = svga->swc; enum pipe_error ret; + unsigned len = codeLen + variant->signatureLen; /** * Shaders in VGPU10 enabled device reside in the device COTable. @@ -412,7 +429,11 @@ define_gb_shader_vgpu10(struct svga_context *svga, /* Create gb memory for the shader and upload the shader code */ variant->gb_shader = swc->shader_create(swc, variant->id, variant->type, - variant->tokens, codeLen); + variant->tokens, codeLen, + variant->signature, + variant->signatureLen); + + svga->hud.shader_mem_used += len; if (!variant->gb_shader) { /* Free the shader ID */ @@ -429,7 +450,8 @@ define_gb_shader_vgpu10(struct svga_context *svga, * the shader creation and return an error. */ ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader, - variant->id, variant->type, codeLen); + variant->id, variant->type, + len); if (ret != PIPE_OK) goto fail; @@ -511,7 +533,10 @@ svga_set_shader(struct svga_context *svga, assert(type == SVGA3D_SHADERTYPE_VS || type == SVGA3D_SHADERTYPE_GS || - type == SVGA3D_SHADERTYPE_PS); + type == SVGA3D_SHADERTYPE_PS || + type == SVGA3D_SHADERTYPE_HS || + type == SVGA3D_SHADERTYPE_DS || + type == SVGA3D_SHADERTYPE_CS); if (svga_have_gb_objects(svga)) { struct svga_winsys_gb_shader *gbshader = @@ -533,7 +558,27 @@ svga_set_shader(struct svga_context *svga, struct svga_shader_variant * svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type) { - struct svga_shader_variant *variant = CALLOC_STRUCT(svga_shader_variant); + struct svga_shader_variant *variant; + + switch (type) { + case PIPE_SHADER_FRAGMENT: + variant = CALLOC(1, sizeof(struct svga_fs_variant)); + break; + case PIPE_SHADER_GEOMETRY: + variant = CALLOC(1, sizeof(struct svga_gs_variant)); + break; + case PIPE_SHADER_VERTEX: + variant = CALLOC(1, sizeof(struct svga_vs_variant)); + break; + case PIPE_SHADER_TESS_EVAL: + variant = CALLOC(1, sizeof(struct svga_tes_variant)); + break; + case PIPE_SHADER_TESS_CTRL: + variant = CALLOC(1, sizeof(struct svga_tcs_variant)); + break; + default: + return NULL; + } if (variant) { variant->type = svga_shader_type(type); @@ -547,19 +592,11 @@ void svga_destroy_shader_variant(struct svga_context *svga, struct svga_shader_variant *variant) { - enum pipe_error ret = PIPE_OK; - if (svga_have_gb_objects(svga) && variant->gb_shader) { if (svga_have_vgpu10(svga)) { struct svga_winsys_context *swc = svga->swc; swc->shader_destroy(swc, variant->gb_shader); - ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); - if (ret != PIPE_OK) { - /* flush and try again */ - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id)); util_bitmask_clear(svga->shader_id_bm, variant->id); } else { @@ -570,17 +607,13 @@ svga_destroy_shader_variant(struct svga_context *svga, } else { if (variant->id != UTIL_BITMASK_INVALID_INDEX) { - ret = SVGA3D_DestroyShader(svga->swc, variant->id, variant->type); - if (ret != PIPE_OK) { - /* flush and try again */ - svga_context_flush(svga, NULL); - ret = SVGA3D_DestroyShader(svga->swc, variant->id, variant->type); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id, + variant->type)); util_bitmask_clear(svga->shader_id_bm, variant->id); } } + FREE(variant->signature); FREE((unsigned *)variant->tokens); FREE(variant); @@ -612,6 +645,8 @@ svga_rebind_shaders(struct svga_context *svga) svga->rebind.flags.vs = 0; svga->rebind.flags.gs = 0; svga->rebind.flags.fs = 0; + svga->rebind.flags.tcs = 0; + svga->rebind.flags.tes = 0; return PIPE_OK; } @@ -637,5 +672,19 @@ svga_rebind_shaders(struct svga_context *svga) } svga->rebind.flags.fs = 0; + if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) { + ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } + svga->rebind.flags.tcs = 0; + + if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) { + ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; + } + svga->rebind.flags.tes = 0; + return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h index 67f6b5aeb63..31ccf97d39a 100644 --- a/src/gallium/drivers/svga/svga_shader.h +++ b/src/gallium/drivers/svga/svga_shader.h @@ -68,6 +68,8 @@ struct svga_compile_key unsigned need_prescale:1; unsigned writes_psize:1; unsigned wide_point:1; + unsigned writes_viewport_index:1; + unsigned num_prescale:5; } gs; /* fragment shader only */ @@ -83,15 +85,42 @@ struct svga_compile_key unsigned alpha_func:4; /**< SVGA3D_CMP_x */ unsigned write_color0_to_n_cbufs:4; unsigned aa_point:1; + unsigned layer_to_zero:1; int aa_point_coord_index; float alpha_ref; } fs; + /* tessellation control shader */ + struct { + unsigned vertices_per_patch:8; + enum pipe_prim_type prim_mode:8; + enum pipe_tess_spacing spacing:3; + unsigned vertices_order_cw:1; + unsigned point_mode:1; + unsigned passthrough:1; + } tcs; + + /* tessellation evaluation shader */ + struct { + unsigned vertices_per_patch:8; + unsigned tessfactor_index:8; + unsigned need_prescale:1; + unsigned need_tessouter:1; + unsigned need_tessinner:1; + } tes; + + /* compute shader */ + struct { + unsigned grid_size[3]; + } cs; + /* any shader type */ int8_t generic_remap_table[MAX_GENERIC_VARYING]; unsigned num_textures:8; unsigned num_unnormalized_coords:8; unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES; + unsigned last_vertex_stage:1; + unsigned clamp_vertex_color:1; unsigned sprite_origin_lower_left:1; uint16_t sprite_coord_enable; struct { @@ -121,6 +150,10 @@ struct svga_token_key { unsigned writes_psize:1; unsigned aa_point:1; } gs; + struct { + unsigned write_position:1; + } vs; + unsigned dynamic_indexing:1; }; /** @@ -143,6 +176,10 @@ struct svga_shader_variant const unsigned *tokens; unsigned nr_tokens; + /* shader signature */ + unsigned signatureLen; + SVGA3dDXShaderSignatureHeader *signature; + /** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE, * SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY. */ @@ -154,6 +191,18 @@ struct svga_shader_variant /* GB object buffer containing the bytecode */ struct svga_winsys_gb_shader *gb_shader; + /** Next variant */ + struct svga_shader_variant *next; +}; + + +/** + * Shader variant for fragment shader + */ +struct svga_fs_variant +{ + struct svga_shader_variant base; + boolean uses_flat_interp; /** TRUE if flat interpolation qualifier is * applied to any of the varyings. */ @@ -168,9 +217,56 @@ struct svga_shader_variant /** For FS-based polygon stipple */ unsigned pstipple_sampler_unit; +}; - /** Next variant */ - struct svga_shader_variant *next; + +/** + * Shader variant for geometry shader + */ +struct svga_gs_variant +{ + struct svga_shader_variant base; +}; + + +/** + * Shader variant for vertex shader + */ +struct svga_vs_variant +{ + struct svga_shader_variant base; +}; + + +/** + * Shader variant for tessellation evaluation shader + */ +struct svga_tes_variant +{ + struct svga_shader_variant base; + + enum pipe_prim_type prim_mode:8; + enum pipe_tess_spacing spacing:3; + unsigned vertices_order_cw:1; + unsigned point_mode:1; +}; + + +/** + * Shader variant for tessellation control shader + */ +struct svga_tcs_variant +{ + struct svga_shader_variant base; +}; + + +/** + * Shader variant for compute shader + */ +struct svga_cs_variant +{ + struct svga_shader_variant base; }; @@ -237,6 +333,30 @@ struct svga_geometry_shader }; +struct svga_tcs_shader +{ + struct svga_shader base; + + /** Mask of which generic varying variables are written by this shader */ + uint64_t generic_outputs; +}; + + +struct svga_tes_shader +{ + struct svga_shader base; + + /** Mask of which generic varying variables are written by this shader */ + uint64_t generic_inputs; +}; + + +struct svga_compute_shader +{ + struct svga_shader base; +}; + + static inline boolean svga_compile_keys_equal(const struct svga_compile_key *a, const struct svga_compile_key *b) @@ -264,7 +384,8 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], void svga_init_shader_key_common(const struct svga_context *svga, - enum pipe_shader_type shader, + enum pipe_shader_type shader_type, + const struct svga_shader *shader, struct svga_compile_key *key); struct svga_shader_variant * @@ -328,6 +449,12 @@ svga_shader_type(enum pipe_shader_type shader) return SVGA3D_SHADERTYPE_GS; case PIPE_SHADER_FRAGMENT: return SVGA3D_SHADERTYPE_PS; + case PIPE_SHADER_TESS_CTRL: + return SVGA3D_SHADERTYPE_HS; + case PIPE_SHADER_TESS_EVAL: + return SVGA3D_SHADERTYPE_DS; + case PIPE_SHADER_COMPUTE: + return SVGA3D_SHADERTYPE_CS; default: assert(!"Invalid shader type"); return SVGA3D_SHADERTYPE_VS; @@ -351,4 +478,39 @@ svga_have_gs_streamout(const struct svga_context *svga) } +static inline struct svga_fs_variant * +svga_fs_variant(struct svga_shader_variant *variant) +{ + assert(!variant || variant->type == SVGA3D_SHADERTYPE_PS); + return (struct svga_fs_variant *)variant; +} + + +static inline struct svga_tes_variant * +svga_tes_variant(struct svga_shader_variant *variant) +{ + assert(!variant || variant->type == SVGA3D_SHADERTYPE_DS); + return (struct svga_tes_variant *)variant; +} + + +static inline struct svga_cs_variant * +svga_cs_variant(struct svga_shader_variant *variant) +{ + assert(!variant || variant->type == SVGA3D_SHADERTYPE_CS); + return (struct svga_cs_variant *)variant; +} + + +/* Returns TRUE if we are currently using flat shading. + */ +static inline boolean +svga_is_using_flat_shading(const struct svga_context *svga) +{ + return + svga->state.hw_draw.fs ? + svga_fs_variant(svga->state.hw_draw.fs)->uses_flat_interp : FALSE; +} + + #endif /* SVGA_SHADER_H */ diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c index dad78389a23..ad647d8784c 100644 --- a/src/gallium/drivers/svga/svga_state.c +++ b/src/gallium/drivers/svga/svga_state.c @@ -60,19 +60,40 @@ static const struct svga_tracked_state *hw_clear_state[] = }; -/* Atoms to update hardware state prior to emitting a draw packet. +/** + * Atoms to update hardware state prior to emitting a draw packet + * for VGPU9 device. */ -static const struct svga_tracked_state *hw_draw_state[] = +static const struct svga_tracked_state *hw_draw_state_vgpu9[] = +{ + &svga_hw_fs, + &svga_hw_vs, + &svga_hw_rss, + &svga_hw_tss, + &svga_hw_tss_binding, + &svga_hw_clip_planes, + &svga_hw_vdecl, + &svga_hw_fs_constants, + &svga_hw_vs_constants, + NULL +}; + + +/** + * Atoms to update hardware state prior to emitting a draw packet + * for VGPU10 device. + * Geometry Shader is new to VGPU10. + * TSS and TSS bindings are replaced by sampler and sampler bindings. + */ +static const struct svga_tracked_state *hw_draw_state_vgpu10[] = { &svga_need_tgsi_transform, &svga_hw_fs, &svga_hw_gs, &svga_hw_vs, &svga_hw_rss, - &svga_hw_sampler, /* VGPU10 */ - &svga_hw_sampler_bindings, /* VGPU10 */ - &svga_hw_tss, /* pre-VGPU10 */ - &svga_hw_tss_binding, /* pre-VGPU10 */ + &svga_hw_sampler, + &svga_hw_sampler_bindings, &svga_hw_clip_planes, &svga_hw_vdecl, &svga_hw_fs_constants, @@ -82,6 +103,33 @@ static const struct svga_tracked_state *hw_draw_state[] = }; +/** + * Atoms to update hardware state prior to emitting a draw packet + * for SM5 device. + * TCS and TES Shaders are new to SM5 device. + */ +static const struct svga_tracked_state *hw_draw_state_sm5[] = +{ + &svga_need_tgsi_transform, + &svga_hw_fs, + &svga_hw_gs, + &svga_hw_tes, + &svga_hw_tcs, + &svga_hw_vs, + &svga_hw_rss, + &svga_hw_sampler, + &svga_hw_sampler_bindings, + &svga_hw_clip_planes, + &svga_hw_vdecl, + &svga_hw_fs_constants, + &svga_hw_gs_constants, + &svga_hw_tes_constants, + &svga_hw_tcs_constants, + &svga_hw_vs_constants, + NULL +}; + + static const struct svga_tracked_state *swtnl_draw_state[] = { &svga_update_swtnl_draw, @@ -89,6 +137,7 @@ static const struct svga_tracked_state *swtnl_draw_state[] = NULL }; + /* Flattens the graph of state dependencies. Could swap the positions * of hw_clear_state and need_swtnl_state without breaking anything. */ @@ -96,27 +145,26 @@ static const struct svga_tracked_state **state_levels[] = { need_swtnl_state, hw_clear_state, - hw_draw_state, + NULL, /* hw_draw_state, to be set to the right version */ swtnl_draw_state }; - -static unsigned -check_state(unsigned a, unsigned b) +static uint64_t +check_state(uint64_t a, uint64_t b) { return (a & b); } static void -accumulate_state(unsigned *a, unsigned b) +accumulate_state(uint64_t *a, uint64_t b) { *a |= b; } static void -xor_states(unsigned *result, unsigned a, unsigned b) +xor_states(uint64_t *result, uint64_t a, uint64_t b) { *result = a ^ b; } @@ -125,7 +173,7 @@ xor_states(unsigned *result, unsigned a, unsigned b) static enum pipe_error update_state(struct svga_context *svga, const struct svga_tracked_state *atoms[], - unsigned *state) + uint64_t *state) { #ifdef DEBUG boolean debug = TRUE; @@ -144,13 +192,13 @@ update_state(struct svga_context *svga, * state flags which are generated and checked to help ensure * state atoms are ordered correctly in the list. */ - unsigned examined, prev; + uint64_t examined, prev; examined = 0; prev = *state; for (i = 0; atoms[i] != NULL; i++) { - unsigned generated; + uint64_t generated; assert(atoms[i]->dirty); assert(atoms[i]->update); @@ -247,12 +295,7 @@ svga_update_state_retry(struct svga_context *svga, unsigned max_level) { enum pipe_error ret; - ret = svga_update_state( svga, max_level ); - - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_update_state( svga, max_level ); - } + SVGA_RETRY_OOM(svga, ret, svga_update_state( svga, max_level )); return ret == PIPE_OK; } @@ -325,3 +368,14 @@ svga_emit_initial_state(struct svga_context *svga) return PIPE_OK; } } + + +void +svga_init_tracked_state(struct svga_context *svga) +{ + /* Set the hw_draw_state atom list to the one for the particular gpu version. + */ + state_levels[2] = svga_have_sm5(svga) ? hw_draw_state_sm5 : + (svga_have_vgpu10(svga) ? hw_draw_state_vgpu10 : + hw_draw_state_vgpu9); +} diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h index 963a27941ba..76befebe4a3 100644 --- a/src/gallium/drivers/svga/svga_state.h +++ b/src/gallium/drivers/svga/svga_state.h @@ -39,8 +39,8 @@ void svga_destroy_state( struct svga_context *svga ); struct svga_tracked_state { const char *name; - unsigned dirty; - enum pipe_error (*update)( struct svga_context *svga, unsigned dirty ); + uint64_t dirty; + enum pipe_error (*update)( struct svga_context *svga, uint64_t dirty ); }; /* NEED_SWTNL @@ -61,6 +61,8 @@ extern struct svga_tracked_state svga_need_tgsi_transform; extern struct svga_tracked_state svga_hw_vs; extern struct svga_tracked_state svga_hw_fs; extern struct svga_tracked_state svga_hw_gs; +extern struct svga_tracked_state svga_hw_tcs; +extern struct svga_tracked_state svga_hw_tes; extern struct svga_tracked_state svga_hw_rss; extern struct svga_tracked_state svga_hw_pstipple; extern struct svga_tracked_state svga_hw_sampler; @@ -72,6 +74,8 @@ extern struct svga_tracked_state svga_hw_vdecl; extern struct svga_tracked_state svga_hw_fs_constants; extern struct svga_tracked_state svga_hw_gs_constants; extern struct svga_tracked_state svga_hw_vs_constants; +extern struct svga_tracked_state svga_hw_tes_constants; +extern struct svga_tracked_state svga_hw_tcs_constants; /* SWTNL_DRAW */ @@ -105,4 +109,15 @@ enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga); enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga); +void svga_init_tracked_state(struct svga_context *svga); + +void * +svga_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ); + +void +svga_bind_fs_state(struct pipe_context *pipe, void *shader); + +bool svga_update_compute_state(struct svga_context *svga); + #endif diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index e2c5bf0163a..9d9f8934ec4 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -133,12 +133,13 @@ svga_get_extra_fs_constants(const struct svga_context *svga, float *dest) * will be returned in 'dest'. */ static unsigned -svga_get_prescale_constants(const struct svga_context *svga, float **dest) +svga_get_prescale_constants(const struct svga_context *svga, float **dest, + const struct svga_prescale *prescale) { - memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float)); + memcpy(*dest, prescale->scale, 4 * sizeof(float)); *dest += 4; - memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float)); + memcpy(*dest, prescale->translate, 4 * sizeof(float)); *dest += 4; return 2; @@ -153,8 +154,8 @@ svga_get_pt_sprite_constants(const struct svga_context *svga, float **dest) const struct svga_screen *screen = svga_screen(svga->pipe.screen); float *dst = *dest; - dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2); - dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2); + dst[0] = 1.0 / (svga->curr.viewport[0].scale[0] * 2); + dst[1] = 1.0 / (svga->curr.viewport[0].scale[1] * 2); dst[2] = svga->curr.rast->pointsize; dst[3] = screen->maxPointSize; *dest = *dest + 4; @@ -186,6 +187,7 @@ svga_get_clip_plane_constants(const struct svga_context *svga, return count; } + /** * Emit any extra vertex shader constants into the buffer pointed * to by 'dest'. @@ -203,15 +205,16 @@ svga_get_extra_vs_constants(const struct svga_context *svga, float *dest) /* SVGA_NEW_VS_VARIANT */ if (variant->key.vs.need_prescale) { - count += svga_get_prescale_constants(svga, &dest); + count += svga_get_prescale_constants(svga, &dest, + &svga->state.hw_clear.prescale[0]); } if (variant->key.vs.undo_viewport) { /* Used to convert window coords back to NDC coords */ - dest[0] = 1.0f / svga->curr.viewport.scale[0]; - dest[1] = 1.0f / svga->curr.viewport.scale[1]; - dest[2] = -svga->curr.viewport.translate[0]; - dest[3] = -svga->curr.viewport.translate[1]; + dest[0] = 1.0f / svga->curr.viewport[0].scale[0]; + dest[1] = 1.0f / svga->curr.viewport[0].scale[1]; + dest[2] = -svga->curr.viewport[0].translate[0]; + dest[3] = -svga->curr.viewport[0].translate[1]; dest += 4; count += 1; } @@ -250,7 +253,20 @@ svga_get_extra_gs_constants(const struct svga_context *svga, float *dest) } if (variant->key.gs.need_prescale) { - count += svga_get_prescale_constants(svga, &dest); + unsigned i, num_prescale = 1; + + /* If prescale is needed and the geometry shader writes to viewport + * index, then prescale for all viewports will be added to the + * constant buffer. + */ + if (variant->key.gs.writes_viewport_index) + num_prescale = svga->state.hw_clear.num_prescale; + + for (i = 0; i < num_prescale; i++) { + count += + svga_get_prescale_constants(svga, &dest, + &svga->state.hw_clear.prescale[i]); + } } /* SVGA_NEW_CLIP */ @@ -265,6 +281,77 @@ svga_get_extra_gs_constants(const struct svga_context *svga, float *dest) } +/** + * Emit any extra tessellation control shader constants into the + * buffer pointed to by 'dest'. + */ +static unsigned +svga_get_extra_tcs_constants(struct svga_context *svga, float *dest) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.tcs; + unsigned count = 0; + + /* SVGA_NEW_CLIP */ + count += svga_get_clip_plane_constants(svga, variant, &dest); + + /* common constants */ + count += svga_get_extra_constants_common(svga, variant, + PIPE_SHADER_TESS_CTRL, + dest); + + assert(count <= MAX_EXTRA_CONSTS); + return count; +} + + +/** + * Emit any extra tessellation evaluation shader constants into + * the buffer pointed to by 'dest'. + */ +static unsigned +svga_get_extra_tes_constants(struct svga_context *svga, float *dest) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.tes; + unsigned count = 0; + + if (variant->key.tes.need_prescale) { + count += svga_get_prescale_constants(svga, &dest, + &svga->state.hw_clear.prescale[0]); + } + + /* SVGA_NEW_CLIP */ + count += svga_get_clip_plane_constants(svga, variant, &dest); + + /* common constants */ + count += svga_get_extra_constants_common(svga, variant, + PIPE_SHADER_TESS_EVAL, + dest); + + assert(count <= MAX_EXTRA_CONSTS); + return count; +} + + +/** + * Emit any extra compute shader constants into + * the buffer pointed to by 'dest'. + */ +static unsigned +svga_get_extra_cs_constants(struct svga_context *svga, float *dest) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.cs; + unsigned count = 0; + + /* common constants */ + count += svga_get_extra_constants_common(svga, variant, + PIPE_SHADER_COMPUTE, + dest); + + assert(count <= MAX_EXTRA_CONSTS); + return count; +} + + /* * Check and emit a range of shader constant registers, trying to coalesce * successive shader constant updates in a single command in order to save @@ -490,6 +577,15 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) const struct svga_shader_variant *variant; unsigned alloc_buf_size; + assert(shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_FRAGMENT || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL || + shader == PIPE_SHADER_COMPUTE); + + cbuf = &svga->curr.constbufs[shader][0]; + switch (shader) { case PIPE_SHADER_VERTEX: variant = svga->state.hw_draw.vs; @@ -503,6 +599,18 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) variant = svga->state.hw_draw.gs; extra_count = svga_get_extra_gs_constants(svga, (float *) extras); break; + case PIPE_SHADER_TESS_CTRL: + variant = svga->state.hw_draw.tcs; + extra_count = svga_get_extra_tcs_constants(svga, (float *) extras); + break; + case PIPE_SHADER_TESS_EVAL: + variant = svga->state.hw_draw.tes; + extra_count = svga_get_extra_tes_constants(svga, (float *) extras); + break; + case PIPE_SHADER_COMPUTE: + variant = svga->state.hw_draw.cs; + extra_count = svga_get_extra_cs_constants(svga, (float *) extras); + break; default: assert(!"Unexpected shader type"); /* Don't return an error code since we don't want to keep re-trying @@ -706,7 +814,7 @@ emit_consts_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) } static enum pipe_error -emit_fs_consts(struct svga_context *svga, unsigned dirty) +emit_fs_consts(struct svga_context *svga, uint64_t dirty) { const struct svga_shader_variant *variant = svga->state.hw_draw.fs; enum pipe_error ret = PIPE_OK; @@ -741,7 +849,7 @@ struct svga_tracked_state svga_hw_fs_constants = static enum pipe_error -emit_vs_consts(struct svga_context *svga, unsigned dirty) +emit_vs_consts(struct svga_context *svga, uint64_t dirty) { const struct svga_shader_variant *variant = svga->state.hw_draw.vs; enum pipe_error ret = PIPE_OK; @@ -776,7 +884,7 @@ struct svga_tracked_state svga_hw_vs_constants = static enum pipe_error -emit_gs_consts(struct svga_context *svga, unsigned dirty) +emit_gs_consts(struct svga_context *svga, uint64_t dirty) { const struct svga_shader_variant *variant = svga->state.hw_draw.gs; enum pipe_error ret = PIPE_OK; @@ -788,17 +896,17 @@ emit_gs_consts(struct svga_context *svga, unsigned dirty) /* SVGA_NEW_GS_CONST_BUFFER */ - if (svga_have_vgpu10(svga)) { - /** - * If only the rasterizer state has changed and the current geometry - * shader does not emit wide points, then there is no reason to - * re-emit the GS constants, so skip it. - */ - if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point) - return PIPE_OK; + assert(svga_have_vgpu10(svga)); - ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY); - } + /** + * If only the rasterizer state has changed and the current geometry + * shader does not emit wide points, then there is no reason to + * re-emit the GS constants, so skip it. + */ + if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point) + return PIPE_OK; + + ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY); return ret; } @@ -814,3 +922,66 @@ struct svga_tracked_state svga_hw_gs_constants = SVGA_NEW_TEXTURE_CONSTS), emit_gs_consts }; + + +/** + * Emit constant buffer for tessellation control shader + */ +static enum pipe_error +emit_tcs_consts(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.tcs; + enum pipe_error ret = PIPE_OK; + + assert(svga_have_sm5(svga)); + + /* SVGA_NEW_TCS_VARIANT */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_TCS_CONST_BUFFER */ + + ret = emit_consts_vgpu10(svga, PIPE_SHADER_TESS_CTRL); + + return ret; +} + + +struct svga_tracked_state svga_hw_tcs_constants = +{ + "hw tcs params", + (SVGA_NEW_TCS_CONST_BUFFER | + SVGA_NEW_TCS_VARIANT), + emit_tcs_consts +}; + + +/** + * Emit constant buffer for tessellation evaluation shader + */ +static enum pipe_error +emit_tes_consts(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.tes; + enum pipe_error ret = PIPE_OK; + + assert(svga_have_sm5(svga)); + + /* SVGA_NEW_TES_VARIANT */ + if (!variant) + return PIPE_OK; + + ret = emit_consts_vgpu10(svga, PIPE_SHADER_TESS_EVAL); + + return ret; +} + + +struct svga_tracked_state svga_hw_tes_constants = +{ + "hw tes params", + (SVGA_NEW_PRESCALE | + SVGA_NEW_TES_CONST_BUFFER | + SVGA_NEW_TES_VARIANT), + emit_tes_consts +}; diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index 99fede51254..dacf86c4277 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -212,9 +212,13 @@ emit_fb_vgpu10(struct svga_context *svga) if (curr->cbufs[i]) { struct pipe_surface *s = curr->cbufs[i]; - rtv[i] = svga_validate_surface_view(svga, svga_surface(s)); - if (rtv[i] == NULL) { - return PIPE_ERROR_OUT_OF_MEMORY; + if (curr->cbufs[i] != hw->cbufs[i]) { + rtv[i] = svga_validate_surface_view(svga, svga_surface(s)); + if (rtv[i] == NULL) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + } else { + rtv[i] = svga->state.hw_clear.rtv[i]; } assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID); @@ -233,9 +237,13 @@ emit_fb_vgpu10(struct svga_context *svga) if (curr->zsbuf) { struct pipe_surface *s = curr->zsbuf; - dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf)); - if (!dsv) { - return PIPE_ERROR_OUT_OF_MEMORY; + if (curr->zsbuf != hw->zsbuf) { + dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf)); + if (!dsv) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + } else { + dsv = svga->state.hw_clear.dsv; } /* Set the rendered-to flag */ @@ -258,10 +266,6 @@ emit_fb_vgpu10(struct svga_context *svga) /* number of render targets sent to the device, not including trailing * unbound render targets. */ - svga->state.hw_clear.num_rendertargets = last_rtv + 1; - svga->state.hw_clear.dsv = dsv; - memcpy(svga->state.hw_clear.rtv, rtv, num_color * sizeof(rtv[0])); - for (i = 0; i < ss->max_color_buffers; i++) { if (hw->cbufs[i] != curr->cbufs[i]) { /* propagate the backed view surface before unbinding it */ @@ -270,19 +274,32 @@ emit_fb_vgpu10(struct svga_context *svga) &svga_surface(hw->cbufs[i])->backed->base, TRUE); } + else if (svga->state.hw_clear.rtv[i] != hw->cbufs[i] && + svga->state.hw_clear.rtv[i]) { + /* Free the alternate surface view when it is unbound. */ + svga->pipe.surface_destroy(&svga->pipe, svga->state.hw_clear.rtv[i]); + } pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]); } } + svga->state.hw_clear.num_rendertargets = last_rtv + 1; + memcpy(svga->state.hw_clear.rtv, rtv, num_color * sizeof(rtv[0])); hw->nr_cbufs = curr->nr_cbufs; if (hw->zsbuf != curr->zsbuf) { /* propagate the backed view surface before unbinding it */ if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) { - svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base, + svga_propagate_surface(svga, + &svga_surface(hw->zsbuf)->backed->base, TRUE); } + else if (svga->state.hw_clear.dsv != hw->zsbuf && svga->state.hw_clear.dsv) { + /* Free the alternate surface view when it is unbound. */ + svga->pipe.surface_destroy(&svga->pipe, svga->state.hw_clear.dsv); + } pipe_surface_reference(&hw->zsbuf, curr->zsbuf); } + svga->state.hw_clear.dsv = dsv; } return ret; @@ -290,7 +307,7 @@ emit_fb_vgpu10(struct svga_context *svga) static enum pipe_error -emit_framebuffer(struct svga_context *svga, unsigned dirty) +emit_framebuffer(struct svga_context *svga, uint64_t dirty) { if (svga_have_vgpu10(svga)) { return emit_fb_vgpu10(svga); @@ -383,13 +400,14 @@ struct svga_tracked_state svga_hw_framebuffer = /*********************************************************************** */ -static enum pipe_error -emit_viewport( struct svga_context *svga, - unsigned dirty ) +static void +get_viewport_prescale(struct svga_context *svga, + struct pipe_viewport_state *viewport, + SVGA3dViewport *vp, + struct svga_prescale *prescale) { - const struct pipe_viewport_state *viewport = &svga->curr.viewport; - struct svga_prescale prescale; SVGA3dRect rect; + /* Not sure if this state is relevant with POSITIONT. Probably * not, but setting to 0,1 avoids some state pingponging. */ @@ -398,7 +416,6 @@ emit_viewport( struct svga_context *svga, float flip = -1.0; boolean degenerate = FALSE; boolean invertY = FALSE; - enum pipe_error ret; float fb_width = (float) svga->curr.framebuffer.width; float fb_height = (float) svga->curr.framebuffer.height; @@ -407,9 +424,8 @@ emit_viewport( struct svga_context *svga, float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1]; float fw = viewport->scale[0] * 2.0f; float fh = flip * viewport->scale[1] * 2.0f; - boolean emit_vgpu10_viewport = FALSE; - memset( &prescale, 0, sizeof(prescale) ); + memset(prescale, 0, sizeof(*prescale)); /* Examine gallium viewport transformation and produce a screen * rectangle and possibly vertex shader pre-transformation to @@ -423,14 +439,14 @@ emit_viewport( struct svga_context *svga, fw, fh); - prescale.scale[0] = 1.0; - prescale.scale[1] = 1.0; - prescale.scale[2] = 1.0; - prescale.scale[3] = 1.0; - prescale.translate[0] = 0; - prescale.translate[1] = 0; - prescale.translate[2] = 0; - prescale.translate[3] = 0; + prescale->scale[0] = 1.0; + prescale->scale[1] = 1.0; + prescale->scale[2] = 1.0; + prescale->scale[3] = 1.0; + prescale->translate[0] = 0; + prescale->translate[1] = 0; + prescale->translate[2] = 0; + prescale->translate[3] = 0; /* Enable prescale to adjust vertex positions to match VGPU10 convention only if rasterization is enabled. @@ -439,12 +455,12 @@ emit_viewport( struct svga_context *svga, degenerate = TRUE; goto out; } else { - prescale.enabled = TRUE; + prescale->enabled = TRUE; } if (fw < 0) { - prescale.scale[0] *= -1.0f; - prescale.translate[0] += -fw; + prescale->scale[0] *= -1.0f; + prescale->translate[0] += -fw; fw = -fw; fx = viewport->scale[0] * 1.0f + viewport->translate[0]; } @@ -452,54 +468,54 @@ emit_viewport( struct svga_context *svga, if (fh < 0.0) { if (svga_have_vgpu10(svga)) { /* floating point viewport params below */ - prescale.translate[1] = fh + fy * 2.0f; + prescale->translate[1] = fh + fy * 2.0f; } else { /* integer viewport params below */ - prescale.translate[1] = fh - 1.0f + fy * 2.0f; + prescale->translate[1] = fh - 1.0f + fy * 2.0f; } fh = -fh; fy -= fh; - prescale.scale[1] = -1.0f; + prescale->scale[1] = -1.0f; invertY = TRUE; } if (fx < 0) { - prescale.translate[0] += fx; - prescale.scale[0] *= fw / (fw + fx); + prescale->translate[0] += fx; + prescale->scale[0] *= fw / (fw + fx); fw += fx; fx = 0.0f; } if (fy < 0) { if (invertY) { - prescale.translate[1] -= fy; + prescale->translate[1] -= fy; } else { - prescale.translate[1] += fy; + prescale->translate[1] += fy; } - prescale.scale[1] *= fh / (fh + fy); + prescale->scale[1] *= fh / (fh + fy); fh += fy; fy = 0.0f; } if (fx + fw > fb_width) { - prescale.scale[0] *= fw / (fb_width - fx); - prescale.translate[0] -= fx * (fw / (fb_width - fx)); - prescale.translate[0] += fx; + prescale->scale[0] *= fw / (fb_width - fx); + prescale->translate[0] -= fx * (fw / (fb_width - fx)); + prescale->translate[0] += fx; fw = fb_width - fx; } if (fy + fh > fb_height) { - prescale.scale[1] *= fh / (fb_height - fy); + prescale->scale[1] *= fh / (fb_height - fy); if (invertY) { float in = fb_height - fy; /* number of vp pixels inside view */ float out = fy + fh - fb_height; /* number of vp pixels out of view */ - prescale.translate[1] += fy * out / in; + prescale->translate[1] += fy * out / in; } else { - prescale.translate[1] -= fy * (fh / (fb_height - fy)); - prescale.translate[1] += fy; + prescale->translate[1] -= fy * (fh / (fb_height - fy)); + prescale->translate[1] += fy; } fh = fb_height - fy; } @@ -566,10 +582,10 @@ emit_viewport( struct svga_context *svga, if (invertY) adjust_y = -adjust_y; - prescale.translate[0] += adjust_x; - prescale.translate[1] += adjust_y; - prescale.translate[2] = 0.5; /* D3D clip space */ - prescale.scale[2] = 0.5; /* D3D clip space */ + prescale->translate[0] += adjust_x; + prescale->translate[1] += adjust_y; + prescale->translate[2] = 0.5; /* D3D clip space */ + prescale->scale[2] = 0.5; /* D3D clip space */ } range_min = viewport->scale[2] * -1.0f + viewport->translate[2]; @@ -584,7 +600,7 @@ emit_viewport( struct svga_context *svga, range_tmp = range_min; range_min = range_max; range_max = range_tmp; - prescale.scale[2] = -prescale.scale[2]; + prescale->scale[2] = -prescale->scale[2]; } /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale. @@ -594,21 +610,21 @@ emit_viewport( struct svga_context *svga, if (range_min < 0.0f) { range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2]; range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2]; - prescale.scale[2] *= 2.0f; - prescale.translate[2] -= 0.5f; + prescale->scale[2] *= 2.0f; + prescale->translate[2] -= 0.5f; } - if (prescale.enabled) { + if (prescale->enabled) { float H[2]; float J[2]; int i; SVGA_DBG(DEBUG_VIEWPORT, "prescale %f,%f %fx%f\n", - prescale.translate[0], - prescale.translate[1], - prescale.scale[0], - prescale.scale[1]); + prescale->translate[0], + prescale->translate[1], + prescale->scale[0], + prescale->scale[1]); H[0] = (float)rect.w / 2.0f; H[1] = -(float)rect.h / 2.0f; @@ -645,16 +661,16 @@ emit_viewport( struct svga_context *svga, * Overwrite prescale.translate with values for K: */ for (i = 0; i < 2; i++) { - prescale.translate[i] = ((prescale.translate[i] + - (prescale.scale[i] - 1.0f) * J[i]) / H[i]); + prescale->translate[i] = ((prescale->translate[i] + + (prescale->scale[i] - 1.0f) * J[i]) / H[i]); } SVGA_DBG(DEBUG_VIEWPORT, "clipspace %f,%f %fx%f\n", - prescale.translate[0], - prescale.translate[1], - prescale.scale[0], - prescale.scale[1]); + prescale->translate[0], + prescale->translate[1], + prescale->scale[0], + prescale->scale[1]); } out: @@ -663,59 +679,90 @@ out: rect.y = 0; rect.w = 1; rect.h = 1; - prescale.enabled = FALSE; + prescale->enabled = FALSE; } - if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) { - if (svga_have_vgpu10(svga)) { - emit_vgpu10_viewport = TRUE; - } - else { + vp->x = (float) rect.x; + vp->y = (float) rect.y; + vp->width = (float) rect.w; + vp->height = (float) rect.h; + vp->minDepth = range_min; + vp->maxDepth = range_max; +} + + +static enum pipe_error +emit_viewport( struct svga_context *svga, + uint64_t dirty ) +{ + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); + SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS]; + struct svga_prescale prescale[SVGA3D_DX_MAX_VIEWPORTS]; + unsigned i; + enum pipe_error ret; + unsigned max_viewports = svgascreen->max_viewports; + + for (i = 0; i < max_viewports; i++) { + get_viewport_prescale(svga, &svga->curr.viewport[i], + &viewports[i], &prescale[i]); + } + + if (memcmp(viewports, svga->state.hw_clear.viewports, + max_viewports * sizeof viewports[0]) != 0) { + + if (!svga_have_vgpu10(svga)) { + SVGA3dRect rect; + SVGA3dViewport *vp = &viewports[0]; + + rect.x = (uint32)vp->x; + rect.y = (uint32)vp->y; + rect.w = (uint32)vp->width; + rect.h = (uint32)vp->height; + ret = SVGA3D_SetViewport(svga->swc, &rect); if (ret != PIPE_OK) return ret; - svga->state.hw_clear.viewport = rect; - } - } + ret = SVGA3D_SetZRange(svga->swc, vp->minDepth, vp->maxDepth); + if (ret != PIPE_OK) + return ret; - if (svga->state.hw_clear.depthrange.zmin != range_min || - svga->state.hw_clear.depthrange.zmax != range_max) - { - if (svga_have_vgpu10(svga)) { - emit_vgpu10_viewport = TRUE; + svga->state.hw_clear.viewport = rect; + svga->state.hw_clear.depthrange.zmin = vp->minDepth; + svga->state.hw_clear.depthrange.zmax = vp->maxDepth; } else { - ret = SVGA3D_SetZRange(svga->swc, range_min, range_max ); + ret = SVGA3D_vgpu10_SetViewports(svga->swc, max_viewports, + viewports); if (ret != PIPE_OK) return ret; - - svga->state.hw_clear.depthrange.zmin = range_min; - svga->state.hw_clear.depthrange.zmax = range_max; } + memcpy(svga->state.hw_clear.viewports, viewports, + max_viewports * sizeof viewports[0]); } - if (emit_vgpu10_viewport) { - SVGA3dViewport vp; - vp.x = (float) rect.x; - vp.y = (float) rect.y; - vp.width = (float) rect.w; - vp.height = (float) rect.h; - vp.minDepth = range_min; - vp.maxDepth = range_max; - ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp); - if (ret != PIPE_OK) - return ret; - - svga->state.hw_clear.viewport = rect; - - svga->state.hw_clear.depthrange.zmin = range_min; - svga->state.hw_clear.depthrange.zmax = range_max; - } - - if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) { + if (memcmp(prescale, svga->state.hw_clear.prescale, + max_viewports * sizeof prescale[0]) != 0) { svga->dirty |= SVGA_NEW_PRESCALE; - svga->state.hw_clear.prescale = prescale; + memcpy(svga->state.hw_clear.prescale, prescale, + max_viewports * sizeof prescale[0]); + + /* + * Determine number of unique prescales. This is to minimize the + * if check needed in the geometry shader to identify the prescale + * for the specified viewport. + */ + unsigned last_prescale = SVGA3D_DX_MAX_VIEWPORTS - 1; + unsigned i; + for (i = SVGA3D_DX_MAX_VIEWPORTS-1; i > 0; i--) { + if (memcmp(&svga->state.hw_clear.prescale[i], + &svga->state.hw_clear.prescale[i-1], + sizeof svga->state.hw_clear.prescale[0])) { + break; + } + last_prescale--; + } + svga->state.hw_clear.num_prescale = last_prescale + 1; } return PIPE_OK; @@ -738,32 +785,49 @@ struct svga_tracked_state svga_hw_viewport = */ static enum pipe_error emit_scissor_rect( struct svga_context *svga, - unsigned dirty ) + uint64_t dirty ) { - const struct pipe_scissor_state *scissor = &svga->curr.scissor; + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); + const struct pipe_scissor_state *scissor = svga->curr.scissor; + unsigned max_viewports = svgascreen->max_viewports; + enum pipe_error ret; - if (svga_have_vgpu10(svga)) { - SVGASignedRect rect; + if (memcmp(&svga->state.hw_clear.scissors[0], scissor, + max_viewports * sizeof *scissor) != 0) { - rect.left = scissor->minx; - rect.top = scissor->miny; - rect.right = scissor->maxx; - rect.bottom = scissor->maxy; + if (svga_have_vgpu10(svga)) { + SVGASignedRect rect[SVGA3D_DX_MAX_VIEWPORTS]; + unsigned i; + + for (i = 0; i < max_viewports; i++) { + rect[i].left = scissor[i].minx; + rect[i].top = scissor[i].miny; + rect[i].right = scissor[i].maxx; + rect[i].bottom = scissor[i].maxy; + } - return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect); - } - else { - SVGA3dRect rect; + ret = SVGA3D_vgpu10_SetScissorRects(svga->swc, max_viewports, rect); + } + else { + SVGA3dRect rect; - rect.x = scissor->minx; - rect.y = scissor->miny; - rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */ - rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */ + rect.x = scissor[0].minx; + rect.y = scissor[0].miny; + rect.w = scissor[0].maxx - scissor[0].minx; /* + 1 ?? */ + rect.h = scissor[0].maxy - scissor[0].miny; /* + 1 ?? */ - return SVGA3D_SetScissorRect(svga->swc, &rect); + ret = SVGA3D_SetScissorRect(svga->swc, &rect); + } + + if (ret != PIPE_OK) + return ret; + + memcpy(svga->state.hw_clear.scissors, scissor, + max_viewports * sizeof *scissor); } -} + return PIPE_OK; +} struct svga_tracked_state svga_hw_scissor = { @@ -779,7 +843,7 @@ struct svga_tracked_state svga_hw_scissor = static enum pipe_error emit_clip_planes( struct svga_context *svga, - unsigned dirty ) + uint64_t dirty ) { unsigned i; enum pipe_error ret; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index d55a799d435..675fec96cf8 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -196,8 +196,10 @@ make_fs_key(const struct svga_context *svga, */ if (svga->curr.gs) { key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs; + key->fs.layer_to_zero = !svga->curr.gs->base.info.writes_layer; } else { key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs; + key->fs.layer_to_zero = 1; } /* Only need fragment shader fixup for twoside lighting if doing @@ -276,7 +278,7 @@ make_fs_key(const struct svga_context *svga, * * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ - svga_init_shader_key_common(svga, shader, key); + svga_init_shader_key_common(svga, shader, &fs->base, key); for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; @@ -317,15 +319,6 @@ make_fs_key(const struct svga_context *svga, debug_warn_once("Unsupported shadow compare function"); } } - else { - /* For other texture formats, just use the compare func/mode - * as-is. Should be no-ops for color textures. For depth - * textures, we do not get automatic depth compare. We have - * to do it ourselves in the shader. And we don't get PCF. - */ - key->tex[i].compare_mode = sampler->compare_mode; - key->tex[i].compare_func = sampler->compare_func; - } } } } @@ -401,22 +394,26 @@ svga_reemit_fs_bindings(struct svga_context *svga) static enum pipe_error -emit_hw_fs(struct svga_context *svga, unsigned dirty) +emit_hw_fs(struct svga_context *svga, uint64_t dirty) { struct svga_shader_variant *variant = NULL; enum pipe_error ret = PIPE_OK; struct svga_fragment_shader *fs = svga->curr.fs; struct svga_compile_key key; + struct svga_shader *prevShader = NULL; /* shader in the previous stage */ SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITFS); + prevShader = svga->curr.gs ? + &svga->curr.gs->base : (svga->curr.tes ? + &svga->curr.tes->base : &svga->curr.vs->base); + /* Disable rasterization if rasterizer_discard flag is set or * vs/gs does not output position. */ svga->disable_rasterizer = svga->curr.rast->templ.rasterizer_discard || - (svga->curr.gs && !svga->curr.gs->base.info.writes_position) || - (!svga->curr.gs && !svga->curr.vs->base.info.writes_position); + !prevShader->info.writes_position; /* Set FS to NULL when rasterization is to be disabled */ if (svga->disable_rasterizer) { diff --git a/src/gallium/drivers/svga/svga_state_gs.c b/src/gallium/drivers/svga/svga_state_gs.c index 1eb4cebc08d..670b757c45f 100644 --- a/src/gallium/drivers/svga/svga_state_gs.c +++ b/src/gallium/drivers/svga/svga_state_gs.c @@ -109,34 +109,45 @@ make_gs_key(struct svga_context *svga, struct svga_compile_key *key) /* * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ - svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key); + svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, &gs->base, key); memcpy(key->generic_remap_table, gs->generic_remap_table, sizeof(gs->generic_remap_table)); key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs; - key->gs.need_prescale = svga->state.hw_clear.prescale.enabled; + key->gs.need_prescale = svga->state.hw_clear.prescale[0].enabled; key->gs.writes_psize = gs->base.info.writes_psize; key->gs.wide_point = gs->wide_point; + key->gs.writes_viewport_index = gs->base.info.writes_viewport_index; + if (key->gs.writes_viewport_index) { + key->gs.num_prescale = svga->state.hw_clear.num_prescale; + } else { + key->gs.num_prescale = 1; + } key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable; key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); /* SVGA_NEW_RAST */ key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable; + + /* Mark this as the last shader in the vertex processing stage */ + key->last_vertex_stage = 1; } static enum pipe_error -emit_hw_gs(struct svga_context *svga, unsigned dirty) +emit_hw_gs(struct svga_context *svga, uint64_t dirty) { struct svga_shader_variant *variant; struct svga_geometry_shader *gs = svga->curr.gs; enum pipe_error ret = PIPE_OK; struct svga_compile_key key; + assert(svga_have_vgpu10(svga)); + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITGS); /* If there's a user-defined GS, we should have a pointer to a derived diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index f9cea143ac9..5a52c25a4c1 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -33,7 +33,7 @@ static enum pipe_error -update_need_swvfetch(struct svga_context *svga, unsigned dirty) +update_need_swvfetch(struct svga_context *svga, uint64_t dirty) { if (!svga->curr.velems) { /* No vertex elements bound. */ @@ -58,7 +58,7 @@ struct svga_tracked_state svga_update_need_swvfetch = static enum pipe_error -update_need_pipeline(struct svga_context *svga, unsigned dirty) +update_need_pipeline(struct svga_context *svga, uint64_t dirty) { boolean need_pipeline = FALSE; struct svga_vertex_shader *vs = svga->curr.vs; @@ -156,7 +156,7 @@ struct svga_tracked_state svga_update_need_pipeline = static enum pipe_error -update_need_swtnl(struct svga_context *svga, unsigned dirty) +update_need_swtnl(struct svga_context *svga, uint64_t dirty) { boolean need_swtnl; diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c index 3c42b4e8595..3549ce2938d 100644 --- a/src/gallium/drivers/svga/svga_state_rss.c +++ b/src/gallium/drivers/svga/svga_state_rss.c @@ -97,7 +97,7 @@ translate_fill_mode(unsigned fill) * the "to" state. */ static enum pipe_error -emit_rss_vgpu9(struct svga_context *svga, unsigned dirty) +emit_rss_vgpu9(struct svga_context *svga, uint64_t dirty) { struct svga_screen *screen = svga_screen(svga->pipe.screen); struct rs_queue queue; @@ -363,7 +363,7 @@ get_no_depth_stencil_test_state(struct svga_context *svga) static enum pipe_error -emit_rss_vgpu10(struct svga_context *svga, unsigned dirty) +emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty) { enum pipe_error ret = PIPE_OK; @@ -487,7 +487,7 @@ emit_rss_vgpu10(struct svga_context *svga, unsigned dirty) static enum pipe_error -emit_rss(struct svga_context *svga, unsigned dirty) +emit_rss(struct svga_context *svga, uint64_t dirty) { if (svga_have_vgpu10(svga)) { return emit_rss_vgpu10(svga, dirty); diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c index 306c55dbb11..bbfd889e9f4 100644 --- a/src/gallium/drivers/svga/svga_state_sampler.c +++ b/src/gallium/drivers/svga/svga_state_sampler.c @@ -131,7 +131,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, if (sv->id == SVGA3D_INVALID_ID) { struct svga_screen *ss = svga_screen(svga->pipe.screen); struct pipe_resource *texture = sv->base.texture; - struct svga_winsys_surface *surface = svga_resource_handle(texture); + struct svga_winsys_surface *surface; SVGA3dSurfaceFormat format; SVGA3dResourceType resourceDim; SVGA3dShaderResourceViewDesc viewDesc; @@ -154,6 +154,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, svga_translate_texture_buffer_view_format(viewFormat, &format, &pf_flags); + surface = svga_buffer_handle(svga, texture, PIPE_BIND_SAMPLER_VIEW); } else { format = svga_translate_format(ss, viewFormat, @@ -161,6 +162,8 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, /* Convert the format to a sampler-friendly format, if needed */ format = svga_sampler_format(format); + + surface = svga_texture(texture)->handle; } assert(format != SVGA3D_FORMAT_INVALID); @@ -234,15 +237,14 @@ svga_validate_pipe_sampler_view(struct svga_context *svga, static enum pipe_error -update_sampler_resources(struct svga_context *svga, unsigned dirty) +update_sampler_resources(struct svga_context *svga, uint64_t dirty) { enum pipe_error ret = PIPE_OK; enum pipe_shader_type shader; - if (!svga_have_vgpu10(svga)) - return PIPE_OK; + assert(svga_have_vgpu10(svga)); - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) { SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS]; struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; @@ -349,7 +351,8 @@ update_sampler_resources(struct svga_context *svga, unsigned dirty) /* Handle polygon stipple sampler view */ if (svga->curr.rast->templ.poly_stipple_enable) { - const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + const unsigned unit = + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit; struct svga_pipe_sampler_view *sv = svga->polygon_stipple.sampler_view; struct svga_winsys_surface *surface; @@ -385,15 +388,14 @@ struct svga_tracked_state svga_hw_sampler_bindings = { static enum pipe_error -update_samplers(struct svga_context *svga, unsigned dirty ) +update_samplers(struct svga_context *svga, uint64_t dirty ) { enum pipe_error ret = PIPE_OK; enum pipe_shader_type shader; - if (!svga_have_vgpu10(svga)) - return PIPE_OK; + assert(svga_have_vgpu10(svga)); - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) { const unsigned count = svga->curr.num_samplers[shader]; SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS]; unsigned i; @@ -404,7 +406,8 @@ update_samplers(struct svga_context *svga, unsigned dirty ) /* _NEW_FS */ if (shader == PIPE_SHADER_FRAGMENT) { - struct svga_shader_variant *fs = svga->state.hw_draw.fs; + struct svga_fs_variant *fs = + svga_fs_variant(svga->state.hw_draw.fs); /* If the fragment shader is doing the shadow comparison * for this texture unit, don't enable shadow compare in * the texture sampler state. @@ -449,7 +452,8 @@ update_samplers(struct svga_context *svga, unsigned dirty ) /* Handle polygon stipple sampler texture */ if (svga->curr.rast->templ.poly_stipple_enable) { - const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + const unsigned unit = + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit; struct svga_sampler_state *sampler = svga->polygon_stipple.sampler; assert(sampler); diff --git a/src/gallium/drivers/svga/svga_state_tgsi_transform.c b/src/gallium/drivers/svga/svga_state_tgsi_transform.c index 1dcc05cfaf0..e0b054acbcf 100644 --- a/src/gallium/drivers/svga/svga_state_tgsi_transform.c +++ b/src/gallium/drivers/svga/svga_state_tgsi_transform.c @@ -29,7 +29,10 @@ #include "util/u_simple_shaders.h" #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_point_sprite.h" +#include "tgsi/tgsi_dynamic_indexing.h" +#include "tgsi/tgsi_vpos.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "svga_context.h" #include "svga_shader.h" @@ -49,6 +52,171 @@ bind_gs_state(struct svga_context *svga, } +static void +insert_at_head(struct svga_shader *head, struct svga_shader *shader) +{ + shader->parent = head; + shader->next = head->next; + head->next = shader; +} + + +/** + * Bind shader + */ +static void +bind_shader(struct svga_context *svga, + const enum pipe_shader_type shader_type, + struct svga_shader *shader) +{ + switch (shader_type) { + case PIPE_SHADER_VERTEX: + svga->pipe.bind_vs_state(&svga->pipe, shader); + break; + case PIPE_SHADER_FRAGMENT: + /** + * Avoid pipe->bind_fs_state call because it goes through aapoint + * layer. We loose linked list of all transformed shaders if aapoint + * is used. + */ + svga_bind_fs_state(&svga->pipe, shader); + break; + case PIPE_SHADER_GEOMETRY: + svga->pipe.bind_gs_state(&svga->pipe, shader); + break; + case PIPE_SHADER_TESS_CTRL: + svga->pipe.bind_tcs_state(&svga->pipe, shader); + break; + case PIPE_SHADER_TESS_EVAL: + svga->pipe.bind_tes_state(&svga->pipe, shader); + break; + default: + return; + } +} + + + +/** + * Create shader + */ +static void * +create_shader(struct svga_context *svga, + const enum pipe_shader_type shader_type, + struct pipe_shader_state *state) +{ + switch (shader_type) { + case PIPE_SHADER_VERTEX: + return svga->pipe.create_vs_state(&svga->pipe, state); + case PIPE_SHADER_FRAGMENT: + /** + * Avoid pipe->create_fs_state call because it goes through aapoint + * layer. We loose linked list of all transformed shaders if aapoint + * is used. + */ + return svga_create_fs_state(&svga->pipe, state); + case PIPE_SHADER_GEOMETRY: + return svga->pipe.create_gs_state(&svga->pipe, state); + case PIPE_SHADER_TESS_CTRL: + return svga->pipe.create_tcs_state(&svga->pipe, state); + case PIPE_SHADER_TESS_EVAL: + return svga->pipe.create_tes_state(&svga->pipe, state); + default: + return NULL; + } +} + + +static void +write_vpos(struct svga_context *svga, + struct svga_shader *shader) +{ + struct svga_token_key key; + boolean use_existing = FALSE; + struct svga_shader *transform_shader; + const struct tgsi_shader_info *info = &shader->info; + + /* Create a token key */ + memset(&key, 0, sizeof key); + key.vs.write_position = 1; + + if (shader->next) { + transform_shader = svga_search_shader_token_key(shader->next, &key); + if (transform_shader) { + use_existing = TRUE; + } + } + + if (!use_existing) { + struct pipe_shader_state state; + struct tgsi_token *new_tokens = NULL; + + new_tokens = tgsi_write_vpos(shader->tokens, + info->immediate_count); + if (!new_tokens) + return; + + pipe_shader_state_from_tgsi(&state, new_tokens); + + transform_shader = create_shader(svga, info->processor, &state); + insert_at_head(shader, transform_shader); + FREE(new_tokens); + } + transform_shader->token_key = key; + bind_shader(svga, info->processor, transform_shader); +} + + +/** + * transform_dynamic_indexing searches shader variant list to see if + * we have transformed shader for dynamic indexing and reuse/bind it. If we + * don't have transformed shader, then it will create new shader from which + * dynamic indexing will be removed. It will also be added to the shader + * variant list and this new shader will be bind to current svga state. + */ +static void +transform_dynamic_indexing(struct svga_context *svga, + struct svga_shader *shader) +{ + struct svga_token_key key; + boolean use_existing = FALSE; + struct svga_shader *transform_shader; + const struct tgsi_shader_info *info = &shader->info; + + /* Create a token key */ + memset(&key, 0, sizeof key); + key.dynamic_indexing = 1; + + if (shader->next) { + transform_shader = svga_search_shader_token_key(shader->next, &key); + if (transform_shader) { + use_existing = TRUE; + } + } + + struct tgsi_token *new_tokens = NULL; + + if (!use_existing) { + struct pipe_shader_state state; + new_tokens = tgsi_remove_dynamic_indexing(shader->tokens, + info->const_buffers_declared, + info->samplers_declared, + info->immediate_count); + if (!new_tokens) + return; + + pipe_shader_state_from_tgsi(&state, new_tokens); + + transform_shader = create_shader(svga, info->processor, &state); + insert_at_head(shader, transform_shader); + } + transform_shader->token_key = key; + bind_shader(svga, info->processor, transform_shader); + if (new_tokens) + FREE(new_tokens); +} + + /** * emulate_point_sprite searches the shader variants list to see it there is * a shader variant with a token string that matches the emulation @@ -233,18 +401,49 @@ add_point_sprite_shader(struct svga_context *svga) return &new_gs->base; } + +static boolean +has_dynamic_indexing(const struct tgsi_shader_info *info) +{ + return (info->dim_indirect_files & (1u << TGSI_FILE_CONSTANT)) || + (info->indirect_files & (1u << TGSI_FILE_SAMPLER)); +} + + /* update_tgsi_transform provides a hook to transform a shader if needed. */ static enum pipe_error -update_tgsi_transform(struct svga_context *svga, unsigned dirty) +update_tgsi_transform(struct svga_context *svga, uint64_t dirty) { struct svga_geometry_shader *gs = svga->curr.user_gs; /* current gs */ struct svga_vertex_shader *vs = svga->curr.vs; /* currently bound vs */ + struct svga_fragment_shader *fs = svga->curr.fs; /* currently bound fs */ + struct svga_tcs_shader *tcs = svga->curr.tcs; /* currently bound tcs */ + struct svga_tes_shader *tes = svga->curr.tes; /* currently bound tes */ struct svga_shader *orig_gs; /* original gs */ struct svga_shader *new_gs; /* new gs */ - if (!svga_have_vgpu10(svga)) - return PIPE_OK; + assert(svga_have_vgpu10(svga)); + + if (vs->base.info.num_outputs == 0) { + write_vpos(svga, &vs->base); + } + + if (vs && has_dynamic_indexing(&vs->base.info)) { + transform_dynamic_indexing(svga, &vs->base); + } + if (fs && has_dynamic_indexing(&fs->base.info)) { + transform_dynamic_indexing(svga, &fs->base); + } + if (gs && has_dynamic_indexing(&gs->base.info)) { + transform_dynamic_indexing(svga, &gs->base); + } + if (tcs && has_dynamic_indexing(&tcs->base.info)) { + transform_dynamic_indexing(svga, &tcs->base); + } + if (tes && has_dynamic_indexing(&tes->base.info)) { + transform_dynamic_indexing(svga, &tes->base); + } if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) { /* If the current prim type is POINTS and the current geometry shader diff --git a/src/gallium/drivers/svga/svga_state_ts.c b/src/gallium/drivers/svga/svga_state_ts.c new file mode 100644 index 00000000000..890d153c7d6 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_ts.c @@ -0,0 +1,392 @@ +/********************************************************** + * Copyright 2018-2020 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" + +#include "svga_context.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" +#include "svga_shader.h" + + +/** + * Translate TGSI shader into an svga shader variant. + */ +static enum pipe_error +compile_tcs(struct svga_context *svga, + struct svga_tcs_shader *tcs, + const struct svga_compile_key *key, + struct svga_shader_variant **out_variant) +{ + struct svga_shader_variant *variant; + enum pipe_error ret = PIPE_ERROR; + + variant = svga_tgsi_vgpu10_translate(svga, &tcs->base, key, + PIPE_SHADER_TESS_CTRL); + if (!variant) + return PIPE_ERROR; + + ret = svga_define_shader(svga, variant); + if (ret != PIPE_OK) { + svga_destroy_shader_variant(svga, variant); + return ret; + } + + *out_variant = variant; + + return PIPE_OK; +} + + +static void +make_tcs_key(struct svga_context *svga, struct svga_compile_key *key) +{ + struct svga_tcs_shader *tcs = svga->curr.tcs; + + memset(key, 0, sizeof *key); + + /* + * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER + */ + svga_init_shader_key_common(svga, PIPE_SHADER_TESS_CTRL, &tcs->base, key); + + /* SVGA_NEW_TCS_PARAM */ + key->tcs.vertices_per_patch = svga->curr.vertices_per_patch; + + /* The tessellator parameters come from the layout section in the + * tessellation evaluation shader. Get these parameters from the + * current tessellation evaluation shader variant. + * Note: this requires the tessellation evaluation shader to be + * compiled first. + */ + struct svga_tes_variant *tes = svga_tes_variant(svga->state.hw_draw.tes); + key->tcs.prim_mode = tes->prim_mode; + key->tcs.spacing = tes->spacing; + key->tcs.vertices_order_cw = tes->vertices_order_cw; + key->tcs.point_mode = tes->point_mode; + + if (svga->tcs.passthrough) + key->tcs.passthrough = 1; + + key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable; + + /* tcs is always followed by tes */ + key->last_vertex_stage = 0; +} + + +static enum pipe_error +emit_hw_tcs(struct svga_context *svga, uint64_t dirty) +{ + struct svga_shader_variant *variant; + struct svga_tcs_shader *tcs = svga->curr.tcs; + enum pipe_error ret = PIPE_OK; + struct svga_compile_key key; + + assert(svga_have_sm5(svga)); + + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITTCS); + + if (!tcs) { + /* If there is no active tcs, then there should not be + * active tes either + */ + assert(!svga->curr.tes); + if (svga->state.hw_draw.tcs != NULL) { + + /** The previous tessellation control shader is made inactive. + * Needs to unbind the tessellation control shader. + */ + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_HS, NULL); + if (ret != PIPE_OK) + goto done; + svga->state.hw_draw.tcs = NULL; + } + goto done; + } + + make_tcs_key(svga, &key); + + /* See if we already have a TCS variant that matches the key */ + variant = svga_search_shader_key(&tcs->base, &key); + + if (!variant) { + ret = compile_tcs(svga, tcs, &key, &variant); + if (ret != PIPE_OK) + goto done; + + /* insert the new variant at head of linked list */ + assert(variant); + variant->next = tcs->base.variants; + tcs->base.variants = variant; + } + + if (variant != svga->state.hw_draw.tcs) { + /* Bind the new variant */ + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_HS, variant); + if (ret != PIPE_OK) + goto done; + + svga->rebind.flags.tcs = FALSE; + svga->dirty |= SVGA_NEW_TCS_VARIANT; + svga->state.hw_draw.tcs = variant; + } + +done: + SVGA_STATS_TIME_POP(svga_sws(svga)); + return ret; +} + + +struct svga_tracked_state svga_hw_tcs = +{ + "tessellation control shader (hwtnl)", + (SVGA_NEW_VS | + SVGA_NEW_TCS | + SVGA_NEW_TES | + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_SAMPLER | + SVGA_NEW_RAST), + emit_hw_tcs +}; + + +/** + * Translate TGSI shader into an svga shader variant. + */ +static enum pipe_error +compile_tes(struct svga_context *svga, + struct svga_tes_shader *tes, + const struct svga_compile_key *key, + struct svga_shader_variant **out_variant) +{ + struct svga_shader_variant *variant; + enum pipe_error ret = PIPE_ERROR; + + variant = svga_tgsi_vgpu10_translate(svga, &tes->base, key, + PIPE_SHADER_TESS_EVAL); + if (!variant) + return PIPE_ERROR; + + ret = svga_define_shader(svga, variant); + if (ret != PIPE_OK) { + svga_destroy_shader_variant(svga, variant); + return ret; + } + + *out_variant = variant; + + return PIPE_OK; +} + + +static void +make_tes_key(struct svga_context *svga, struct svga_compile_key *key) +{ + struct svga_tes_shader *tes = svga->curr.tes; + + memset(key, 0, sizeof *key); + + /* + * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER + */ + svga_init_shader_key_common(svga, PIPE_SHADER_TESS_EVAL, &tes->base, key); + + assert(svga->curr.tcs); + key->tes.vertices_per_patch = + svga->curr.tcs->base.info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; + + key->tes.need_prescale = svga->state.hw_clear.prescale[0].enabled && + (svga->curr.gs == NULL); + + /* tcs emits tessellation factors as extra outputs. + * Since tes depends on them, save the tessFactor output index + * from tcs in the tes compile key, so that if a different + * tcs is bound and if the tessFactor index is different, + * a different tes variant will be generated. + */ + key->tes.tessfactor_index = svga->curr.tcs->base.info.num_outputs; + + key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable; + + /* This is the last vertex stage if there is no geometry shader. */ + key->last_vertex_stage = !svga->curr.gs; + + key->tes.need_tessinner = 0; + key->tes.need_tessouter = 0; + + for (int i = 0; i < svga->curr.tcs->base.info.num_outputs; i++) { + switch (svga->curr.tcs->base.info.output_semantic_name[i]) { + case TGSI_SEMANTIC_TESSOUTER: + key->tes.need_tessouter = 1; + break; + case TGSI_SEMANTIC_TESSINNER: + key->tes.need_tessinner = 1; + break; + default: + break; + } + } + +} + + +static void +get_passthrough_tcs(struct svga_context *svga) +{ + if (svga->tcs.passthrough_tcs && + svga->tcs.vs == svga->curr.vs && + svga->tcs.tes == svga->curr.tes && + svga->tcs.vertices_per_patch == svga->curr.vertices_per_patch) { + svga->pipe.bind_tcs_state(&svga->pipe, + svga->tcs.passthrough_tcs); + } + else { + struct svga_tcs_shader *new_tcs; + + /* delete older passthrough shader*/ + if (svga->tcs.passthrough_tcs) { + svga->pipe.delete_tcs_state(&svga->pipe, + svga->tcs.passthrough_tcs); + } + + new_tcs = (struct svga_tcs_shader *) + util_make_tess_ctrl_passthrough_shader(&svga->pipe, + svga->curr.vs->base.info.num_outputs, + svga->curr.tes->base.info.num_inputs, + svga->curr.vs->base.info.output_semantic_name, + svga->curr.vs->base.info.output_semantic_index, + svga->curr.tes->base.info.input_semantic_name, + svga->curr.tes->base.info.input_semantic_index, + svga->curr.vertices_per_patch); + svga->pipe.bind_tcs_state(&svga->pipe, new_tcs); + svga->tcs.passthrough_tcs = new_tcs; + svga->tcs.vs = svga->curr.vs; + svga->tcs.tes = svga->curr.tes; + svga->tcs.vertices_per_patch = svga->curr.vertices_per_patch; + } + + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.user_buffer = (void *) svga->curr.default_tesslevels; + cb.buffer_offset = 0; + cb.buffer_size = 2 * 4 * sizeof(float); + svga->pipe.set_constant_buffer(&svga->pipe, PIPE_SHADER_TESS_CTRL, 0, &cb); +} + + +static enum pipe_error +emit_hw_tes(struct svga_context *svga, uint64_t dirty) +{ + struct svga_shader_variant *variant; + struct svga_tes_shader *tes = svga->curr.tes; + enum pipe_error ret = PIPE_OK; + struct svga_compile_key key; + + assert(svga_have_sm5(svga)); + + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITTES); + + if (!tes) { + /* The GL spec implies that TES is optional when there's a TCS, + * but that's apparently a spec error. Assert if we have a TCS + * but no TES. + */ + assert(!svga->curr.tcs); + if (svga->state.hw_draw.tes != NULL) { + + /** The previous tessellation evaluation shader is made inactive. + * Needs to unbind the tessellation evaluation shader. + */ + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_DS, NULL); + if (ret != PIPE_OK) + goto done; + svga->state.hw_draw.tes = NULL; + } + goto done; + } + + if (!svga->curr.tcs) { + /* TES state is processed before the TCS + * shader and that's why we're checking for and creating the + * passthough TCS in the emit_hw_tes() function. + */ + get_passthrough_tcs(svga); + svga->tcs.passthrough = TRUE; + } + else { + svga->tcs.passthrough = FALSE; + } + + make_tes_key(svga, &key); + + /* See if we already have a TES variant that matches the key */ + variant = svga_search_shader_key(&tes->base, &key); + + if (!variant) { + ret = compile_tes(svga, tes, &key, &variant); + if (ret != PIPE_OK) + goto done; + + /* insert the new variant at head of linked list */ + assert(variant); + variant->next = tes->base.variants; + tes->base.variants = variant; + } + + if (variant != svga->state.hw_draw.tes) { + /* Bind the new variant */ + ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_DS, variant); + if (ret != PIPE_OK) + goto done; + + svga->rebind.flags.tes = FALSE; + svga->dirty |= SVGA_NEW_TES_VARIANT; + svga->state.hw_draw.tes = variant; + } + +done: + SVGA_STATS_TIME_POP(svga_sws(svga)); + return ret; +} + + +struct svga_tracked_state svga_hw_tes = +{ + "tessellation evaluation shader (hwtnl)", + /* TBD SVGA_NEW_VS/SVGA_NEW_FS/SVGA_NEW_GS are required or not*/ + (SVGA_NEW_VS | + SVGA_NEW_FS | + SVGA_NEW_GS | + SVGA_NEW_TCS | + SVGA_NEW_TES | + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_SAMPLER | + SVGA_NEW_RAST), + emit_hw_tes +}; diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c index 95b1a9e952d..75b0ac60f58 100644 --- a/src/gallium/drivers/svga/svga_state_tss.c +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -139,7 +139,7 @@ emit_tex_binding_unit(struct svga_context *svga, static enum pipe_error -update_tss_binding(struct svga_context *svga, unsigned dirty) +update_tss_binding(struct svga_context *svga, uint64_t dirty ) { const enum pipe_shader_type shader = PIPE_SHADER_FRAGMENT; boolean reemit = svga->rebind.flags.texture_samplers; @@ -149,8 +149,7 @@ update_tss_binding(struct svga_context *svga, unsigned dirty) struct bind_queue queue; - if (svga_have_vgpu10(svga)) - return PIPE_OK; + assert(!svga_have_vgpu10(svga)); queue.bind_count = 0; @@ -167,7 +166,8 @@ update_tss_binding(struct svga_context *svga, unsigned dirty) /* Polygon stipple */ if (svga->curr.rast->templ.poly_stipple_enable) { - const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + const unsigned unit = + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit; emit_tex_binding_unit(svga, unit, svga->polygon_stipple.sampler, &svga->polygon_stipple.sampler_view->base, @@ -257,7 +257,8 @@ svga_reemit_tss_bindings(struct svga_context *svga) /* Polygon stipple */ if (svga->curr.rast && svga->curr.rast->templ.poly_stipple_enable) { - const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; + const unsigned unit = + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit; struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit]; if (view->v) { @@ -380,14 +381,13 @@ emit_tss_unit(struct svga_context *svga, unsigned unit, } static enum pipe_error -update_tss(struct svga_context *svga, unsigned dirty) +update_tss(struct svga_context *svga, uint64_t dirty ) { const enum pipe_shader_type shader = PIPE_SHADER_FRAGMENT; unsigned i; struct ts_queue queue; - if (svga_have_vgpu10(svga)) - return PIPE_OK; + assert(!svga_have_vgpu10(svga)); queue.ts_count = 0; for (i = 0; i < svga->curr.num_samplers[shader]; i++) { @@ -400,7 +400,7 @@ update_tss(struct svga_context *svga, unsigned dirty) /* polygon stipple sampler */ if (svga->curr.rast->templ.poly_stipple_enable) { emit_tss_unit(svga, - svga->state.hw_draw.fs->pstipple_sampler_unit, + svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit, svga->polygon_stipple.sampler, &queue); } diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index fd6a238ef16..a49bcd0a263 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -40,7 +40,7 @@ static enum pipe_error -emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) +emit_hw_vs_vdecl(struct svga_context *svga, uint64_t dirty) { const struct pipe_vertex_element *ve = svga->curr.velems->velem; SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX]; @@ -136,7 +136,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) static enum pipe_error -emit_hw_vdecl(struct svga_context *svga, unsigned dirty) +emit_hw_vdecl(struct svga_context *svga, uint64_t dirty) { /* SVGA_NEW_NEED_SWTNL */ diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index d63b52454ca..147b07aaeb1 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -164,7 +164,7 @@ compile_vs(struct svga_context *svga, static void make_vs_key(struct svga_context *svga, struct svga_compile_key *key) { - const enum pipe_shader_type shader = PIPE_SHADER_VERTEX; + struct svga_vertex_shader *vs = svga->curr.vs; memset(key, 0, sizeof *key); @@ -176,7 +176,8 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key) } /* SVGA_NEW_PRESCALE */ - key->vs.need_prescale = svga->state.hw_clear.prescale.enabled && + key->vs.need_prescale = svga->state.hw_clear.prescale[0].enabled && + (svga->curr.tes == NULL) && (svga->curr.gs == NULL); /* SVGA_NEW_RAST */ @@ -199,10 +200,16 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key) key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled; /* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ - svga_init_shader_key_common(svga, shader, key); + svga_init_shader_key_common(svga, PIPE_SHADER_VERTEX, &vs->base, key); /* SVGA_NEW_RAST */ key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable; + + /* Determine if this shader is the last shader in the vertex + * processing stage. + */ + key->last_vertex_stage = !(svga->curr.gs || + svga->curr.tcs || svga->curr.tes); } @@ -338,7 +345,7 @@ compile_passthrough_vs(struct svga_context *svga, static enum pipe_error -emit_hw_vs(struct svga_context *svga, unsigned dirty) +emit_hw_vs(struct svga_context *svga, uint64_t dirty) { struct svga_shader_variant *variant; struct svga_vertex_shader *vs = svga->curr.vs; diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h index 1daa1ad5352..5e6db247b53 100644 --- a/src/gallium/drivers/svga/svga_streamout.h +++ b/src/gallium/drivers/svga/svga_streamout.h @@ -32,6 +32,9 @@ struct svga_stream_output { struct pipe_stream_output_info info; unsigned pos_out_index; // position output index unsigned id; + unsigned streammask; // bitmask to specify which streams are enabled + unsigned buffer_stream; + struct svga_winsys_buffer *declBuf; }; struct svga_stream_output * @@ -50,4 +53,20 @@ svga_delete_stream_output(struct svga_context *svga, enum pipe_error svga_rebind_stream_output_targets(struct svga_context *svga); +void +svga_create_stream_output_queries(struct svga_context *svga); + +void +svga_destroy_stream_output_queries(struct svga_context *svga); + +void +svga_begin_stream_output_queries(struct svga_context *svga, unsigned mask); + +void +svga_end_stream_output_queries(struct svga_context *svga, unsigned mask); + +unsigned +svga_get_primcount_from_stream_output(struct svga_context *svga, + unsigned stream); + #endif /* SVGA_STREAMOUT_H */ diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index 2c48a66186f..d3dd23d2d81 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -578,6 +578,16 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) } } + /** + * Create an alternate surface view for the specified context if the + * view was created for another context. + */ + if (s && s->base.context != &svga->pipe) { + struct pipe_surface *surf; + surf = svga_create_surface_view(&svga->pipe, s->base.texture, &s->base, FALSE); + s = svga_surface(surf); + } + if (s && s->view_id == SVGA3D_INVALID_ID) { SVGA3dResourceType resType; SVGA3dRenderTargetViewDesc desc; diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index 587632d0eb6..1413e3a4b52 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -146,6 +146,8 @@ static inline SVGA3dResourceType svga_resource_type(enum pipe_texture_target target) { switch (target) { + case PIPE_BUFFER: + return SVGA3D_RESOURCE_BUFFER; case PIPE_TEXTURE_1D: case PIPE_TEXTURE_1D_ARRAY: return SVGA3D_RESOURCE_TEXTURE1D; diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index b6fd07fe346..3e8c90d8e1e 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -90,11 +90,12 @@ svga_vbuf_render_allocate_vertices(struct vbuf_render *render, if (!svga_render->vbuf) { svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); - svga_render->vbuf = pipe_buffer_create(screen, - PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STREAM, - svga_render->vbuf_size); + svga_render->vbuf = SVGA_TRY_PTR(pipe_buffer_create + (screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + svga_render->vbuf_size)); if (!svga_render->vbuf) { + svga_retry_enter(svga); svga_context_flush(svga, NULL); assert(!svga_render->vbuf); svga_render->vbuf = pipe_buffer_create(screen, @@ -104,6 +105,7 @@ svga_vbuf_render_allocate_vertices(struct vbuf_render *render, /* The buffer allocation may fail if we run out of memory. * The draw module's vbuf code should handle that without crashing. */ + svga_retry_exit(svga); } svga->swtnl.new_vdecl = TRUE; @@ -267,7 +269,7 @@ svga_vbuf_submit_state(struct svga_vbuf_render *svga_render) else { svga_hwtnl_set_flatshade(svga->hwtnl, svga->curr.rast->templ.flatshade || - svga->state.hw_draw.fs->uses_flat_interp, + svga_is_using_flat_shading(svga), svga->curr.rast->templ.flatshade_first); svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); @@ -286,10 +288,10 @@ svga_vbuf_render_draw_arrays(struct vbuf_render *render, struct svga_context *svga = svga_render->svga; unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; - enum pipe_error ret = PIPE_OK; /* instancing will already have been resolved at this point by 'draw' */ const unsigned start_instance = 0; const unsigned instance_count = 1; + boolean retried; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_VBUFDRAWARRAYS); @@ -301,17 +303,13 @@ svga_vbuf_render_draw_arrays(struct vbuf_render *render, * redbook/polys.c */ svga_update_state_retry(svga, SVGA_STATE_HW_DRAW); - - ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, - nr, start_instance, instance_count); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, - start + bias, nr, - start_instance, instance_count); + SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_arrays + (svga->hwtnl, svga_render->prim, start + bias, + nr, start_instance, instance_count, 0), retried); + if (retried) { svga->swtnl.new_vbuf = TRUE; - assert(ret == PIPE_OK); } + SVGA_STATS_TIME_POP(svga_sws(svga)); } @@ -325,7 +323,7 @@ svga_vbuf_render_draw_elements(struct vbuf_render *render, struct svga_context *svga = svga_render->svga; int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; - boolean ret; + boolean retried; /* instancing will already have been resolved at this point by 'draw' */ const struct pipe_draw_info info = { .index_size = 2, @@ -354,13 +352,12 @@ svga_vbuf_render_draw_elements(struct vbuf_render *render, * redbook/polys.c */ svga_update_state_retry(svga, SVGA_STATE_HW_DRAW); - ret = svga_hwtnl_draw_range_elements(svga->hwtnl, &info, nr_indices); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_hwtnl_draw_range_elements(svga->hwtnl, &info, nr_indices); + SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_range_elements(svga->hwtnl, &info, + nr_indices), retried); + if (retried) { svga->swtnl.new_vbuf = TRUE; - assert(ret == PIPE_OK); } + SVGA_STATS_TIME_POP(svga_sws(svga)); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 816fef1c4ea..789ed23e88b 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -51,7 +51,7 @@ static void set_draw_viewport(struct svga_context *svga) { - struct pipe_viewport_state vp = svga->curr.viewport; + struct pipe_viewport_state vp = svga->curr.viewport[0]; float adjx = 0.0f; float adjy = 0.0f; @@ -98,7 +98,7 @@ set_draw_viewport(struct svga_context *svga) } static enum pipe_error -update_swtnl_draw(struct svga_context *svga, unsigned dirty) +update_swtnl_draw(struct svga_context *svga, uint64_t dirty) { SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SWTNLUPDATEDRAW); @@ -191,7 +191,6 @@ svga_vdecl_to_input_element(struct svga_context *svga, { SVGA3dElementLayoutId id; SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS]; - enum pipe_error ret; unsigned i; assert(num_decls <= PIPE_MAX_ATTRIBS); @@ -208,13 +207,8 @@ svga_vdecl_to_input_element(struct svga_context *svga, id = util_bitmask_add(svga->input_element_object_id_bm); - ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, - id, elements); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, + elements)); return id; } @@ -306,22 +300,14 @@ svga_swtnl_update_vdecl(struct svga_context *svga) any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)); if (svga_have_vgpu10(svga)) { - enum pipe_error ret; - if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) { goto done; } if (svga_render->layout_id != SVGA3D_INVALID_ID) { /* destroy old */ - ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, - svga_render->layout_id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, - svga_render->layout_id); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyElementLayout + (svga->swc, svga_render->layout_id)); /** * reset current layout id state after the element layout is @@ -340,14 +326,8 @@ svga_swtnl_update_vdecl(struct svga_context *svga) /* bind new */ if (svga->state.hw_draw.layout_id != svga_render->layout_id) { - ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, - svga_render->layout_id); - assert(ret == PIPE_OK); - } - + SVGA_RETRY(svga, SVGA3D_vgpu10_SetInputLayout(svga->swc, + svga_render->layout_id)); svga->state.hw_draw.layout_id = svga_render->layout_id; } } @@ -366,7 +346,7 @@ done: static enum pipe_error -update_swtnl_vdecl(struct svga_context *svga, unsigned dirty) +update_swtnl_vdecl(struct svga_context *svga, uint64_t dirty) { return svga_swtnl_update_vdecl(svga); } diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 5c3afee3845..0f7597f6157 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -238,14 +238,18 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga, memcpy(&variant->key, key, sizeof(*key)); variant->id = UTIL_BITMASK_INVALID_INDEX; - variant->pstipple_sampler_unit = emit.pstipple_sampler_unit; - - /* If there was exactly one write to a fragment shader output register - * and it came from a constant buffer, we know all fragments will have - * the same color (except for blending). - */ - variant->constant_color_output = - emit.constant_color_output && emit.num_output_writes == 1; + if (unit == PIPE_SHADER_FRAGMENT) { + struct svga_fs_variant *fs_variant = svga_fs_variant(variant); + + fs_variant->pstipple_sampler_unit = emit.pstipple_sampler_unit; + + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + fs_variant->constant_color_output = + emit.constant_color_output && emit.num_output_writes == 1; + } #if 0 if (!svga_shader_verify(variant->tokens, variant->nr_tokens) || diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index e98601127f4..9c467cc7814 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -30,7 +30,7 @@ #include "svga3d_reg.h" -#define MAX_VGPU10_ADDR_REGS 2 +#define MAX_VGPU10_ADDR_REGS 4 struct svga_compile_key; struct svga_context; diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 099ede6017d..6e607cd0616 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -40,6 +40,7 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_strings.h" #include "tgsi/tgsi_two_side.h" #include "tgsi/tgsi_aa_point.h" #include "tgsi/tgsi_util.h" @@ -87,6 +88,100 @@ enum clipping_mode }; +/* Shader signature info */ +struct svga_shader_signature +{ + SVGA3dDXShaderSignatureHeader header; + SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS]; + SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS]; + SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS]; +}; + +static inline void +set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e, + unsigned index, + SVGA3dDXSignatureSemanticName sgnName, + unsigned mask, + SVGA3dDXSignatureRegisterComponentType compType, + SVGA3dDXSignatureMinPrecision minPrecision) +{ + e->registerIndex = index; + e->semanticName = sgnName; + e->mask = mask; + e->componentType = compType; + e->minPrecision = minPrecision; +}; + +static const SVGA3dDXSignatureSemanticName +tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = { + SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID, + SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, + SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX, + SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX, + SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, + SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED +}; + + +/** + * Map tgsi semantic name to SVGA signature semantic name + */ +static inline SVGA3dDXSignatureSemanticName +map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name) +{ + assert(name < TGSI_SEMANTIC_COUNT); + + /* Do a few asserts here to spot check the mapping */ + assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] == + SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); + assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] == + SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX); + assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] == + SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID); + + return tgsi_semantic_to_sgn_name[name]; +} + + struct svga_shader_emitter_v10 { /* The token output buffer */ @@ -100,12 +195,16 @@ struct svga_shader_emitter_v10 unsigned unit; unsigned version; /**< Either 40 or 41 at this time */ + unsigned cur_tgsi_token; /**< current tgsi token position */ unsigned inst_start_token; boolean discard_instruction; /**< throw away current instruction? */ + boolean reemit_instruction; /**< reemit current instruction */ + boolean skip_instruction; /**< skip current instruction */ union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; + double (*immediates_dbl)[2]; unsigned num_immediates; /**< Number of immediates emitted */ - unsigned common_immediate_pos[8]; /**< literals for common immediates */ + unsigned common_immediate_pos[10]; /**< literals for common immediates */ unsigned num_common_immediates; boolean immediates_emitted; @@ -126,8 +225,11 @@ struct svga_shader_emitter_v10 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ struct { unsigned arrayId, index; + boolean initialized; } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ + unsigned initialize_temp_index; + /** Number of constants used by original shader for each constant buffer. * The size should probably always match with that of svga_state.constbufs. */ @@ -139,6 +241,16 @@ struct svga_shader_emitter_v10 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ + /* Index Range declaration */ + struct { + unsigned start_index; + unsigned count; + boolean required; + unsigned operandType; + unsigned size; + unsigned dim; + } index_range; + /* Address regs (really implemented with temps) */ unsigned num_address_regs; unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; @@ -154,8 +266,12 @@ struct svga_shader_emitter_v10 unsigned out_index; /**< the real position output reg */ unsigned tmp_index; /**< the fake/temp position output reg */ unsigned so_index; /**< the non-adjusted position output reg */ + unsigned prescale_cbuf_index; /* index to the const buf for prescale */ unsigned prescale_scale_index, prescale_trans_index; - boolean need_prescale; + unsigned num_prescale; /* number of prescale factor in const buf */ + unsigned viewport_index; + unsigned need_prescale:1; + unsigned have_prescale:1; } vposition; /* For vertex shaders only */ @@ -183,13 +299,20 @@ struct svga_shader_emitter_v10 unsigned fragcoord_input_index; /**< real fragment position input reg */ unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ - /** Which texture units are doing shadow comparison in the FS code */ - unsigned shadow_compare_units; - unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */ unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */ unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */ + + /** TGSI index of sample mask input sys value */ + unsigned sample_mask_in_sys_index; + + /** Which texture units are doing shadow comparison in the FS code */ + unsigned shadow_compare_units; + + /* layer */ + unsigned layer_input_index; /**< TGSI index of layer */ + unsigned layer_imm_index; /**< immediate for default layer 0 */ } fs; /* For geometry shaders only */ @@ -199,8 +322,63 @@ struct svga_shader_emitter_v10 unsigned input_size; /**< size of input arrays */ unsigned prim_id_index; /**< primitive id register index */ unsigned max_out_vertices; /**< maximum number of output vertices */ + unsigned invocations; + unsigned invocation_id_sys_index; + + unsigned viewport_index_out_index; + unsigned viewport_index_tmp_index; } gs; + /* For tessellation control shaders only */ + struct { + unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */ + unsigned imm_index; /**< immediate for tcs */ + unsigned vertices_out; + unsigned invocation_id_sys_index; /**< invocation id */ + unsigned invocation_id_tmp_index; + unsigned instruction_token_pos; /* token pos for the first instruction */ + unsigned control_point_input_index; /* control point input register index */ + unsigned control_point_addr_index; /* control point input address register */ + unsigned control_point_out_index; /* control point output register index */ + unsigned control_point_tmp_index; /* control point temporary register */ + unsigned control_point_out_count; /* control point output count */ + boolean control_point_phase; /* true if in control point phase */ + unsigned patch_generic_out_count; /* per-patch generic output count */ + unsigned patch_generic_out_index; /* per-patch generic output register index*/ + unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/ + unsigned prim_id_index; /* primitive id */ + struct { + unsigned out_index; /* real tessinner output register */ + unsigned temp_index; /* tessinner temp register */ + unsigned tgsi_index; /* tgsi tessinner output register */ + } inner; + struct { + unsigned out_index; /* real tessouter output register */ + unsigned temp_index; /* tessouter temp register */ + unsigned tgsi_index; /* tgsi tessouter output register */ + } outer; + } tcs; + + /* For tessellation evaluation shaders only */ + struct { + enum pipe_prim_type prim_mode; + enum pipe_tess_spacing spacing; + boolean vertices_order_cw; + boolean point_mode; + unsigned tesscoord_sys_index; + unsigned prim_id_index; /* primitive id */ + struct { + unsigned in_index; /* real tessinner input register */ + unsigned temp_index; /* tessinner temp register */ + unsigned tgsi_index; /* tgsi tessinner input register */ + } inner; + struct { + unsigned in_index; /* real tessouter input register */ + unsigned temp_index; /* tessouter temp register */ + unsigned tgsi_index; /* tgsi tessouter input register */ + } outer; + } tes; + /* For vertex or geometry shaders */ enum clipping_mode clip_mode; unsigned clip_dist_out_index; /**< clip distance output register index */ @@ -219,19 +397,41 @@ struct svga_shader_emitter_v10 boolean uses_flat_interp; + unsigned reserved_token; /* index to the reserved token */ + boolean uses_precise_qualifier; + /* For all shaders: const reg index for RECT coord scaling */ unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; /* For all shaders: const reg index for texture buffer size */ unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; - /* VS/GS/FS Linkage info */ + /* VS/TCS/TES/GS/FS Linkage info */ struct shader_linkage linkage; + /* Shader signature */ + struct svga_shader_signature signature; + bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ + + /* For pipe_debug_message */ + struct pipe_debug_callback svga_debug_callback; + + /* current loop depth in shader */ + unsigned current_loop_depth; }; +static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit); +static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit); +static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit); +static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit); +static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); +static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit); +static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); +static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); +static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit); + static boolean emit_post_helpers(struct svga_shader_emitter_v10 *emit); @@ -239,6 +439,26 @@ static boolean emit_vertex(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst); +static boolean +emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst); + +static void +emit_input_declaration(struct svga_shader_emitter_v10 *emit, + unsigned opcodeType, unsigned operandType, + unsigned dim, unsigned index, unsigned size, + unsigned name, unsigned numComp, + unsigned selMode, unsigned usageMask, + unsigned interpMode, + boolean addSignature, + SVGA3dDXSignatureSemanticName sgnName); + +static void +create_temp_array(struct svga_shader_emitter_v10 *emit, + unsigned arrayID, unsigned first, unsigned count, + unsigned startIndex); + static char err_buf[128]; static boolean @@ -381,7 +601,11 @@ check_register_index(struct svga_shader_emitter_v10 *emit, (emit->unit == PIPE_SHADER_GEOMETRY && index >= VGPU10_MAX_GS_INPUTS) || (emit->unit == PIPE_SHADER_FRAGMENT && - index >= VGPU10_MAX_FS_INPUTS)) { + index >= VGPU10_MAX_FS_INPUTS) || + (emit->unit == PIPE_SHADER_TESS_CTRL && + index >= VGPU11_MAX_HS_INPUTS) || + (emit->unit == PIPE_SHADER_TESS_EVAL && + index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) { emit->register_overflow = TRUE; } break; @@ -389,12 +613,22 @@ check_register_index(struct svga_shader_emitter_v10 *emit, case VGPU10_OPCODE_DCL_OUTPUT: case VGPU10_OPCODE_DCL_OUTPUT_SGV: case VGPU10_OPCODE_DCL_OUTPUT_SIV: + /* Note: we are skipping two output indices in tcs for + * tessinner/outer levels. Implementation will not exceed + * number of output count but it allows index to go beyond + * VGPU11_MAX_HS_OUTPUTS. + * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2 + */ if ((emit->unit == PIPE_SHADER_VERTEX && index >= VGPU10_MAX_VS_OUTPUTS) || (emit->unit == PIPE_SHADER_GEOMETRY && index >= VGPU10_MAX_GS_OUTPUTS) || (emit->unit == PIPE_SHADER_FRAGMENT && - index >= VGPU10_MAX_FS_OUTPUTS)) { + index >= VGPU10_MAX_FS_OUTPUTS) || + (emit->unit == PIPE_SHADER_TESS_CTRL && + index >= VGPU11_MAX_HS_OUTPUTS + 2) || + (emit->unit == PIPE_SHADER_TESS_EVAL && + index >= VGPU11_MAX_DS_OUTPUTS)) { emit->register_overflow = TRUE; } break; @@ -436,13 +670,33 @@ check_register_index(struct svga_shader_emitter_v10 *emit, static void determine_clipping_mode(struct svga_shader_emitter_v10 *emit) { + /* num_written_clipdistance in the shader info for tessellation + * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED + * is not defined for this shader. So we go through all the output declarations + * to set the num_written_clipdistance. This is just to determine the + * clipping mode. + */ + if (emit->unit == PIPE_SHADER_TESS_CTRL) { + unsigned i; + for (i = 0; i < emit->info.num_outputs; i++) { + if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { + emit->info.num_written_clipdistance = + 4 * (emit->info.output_semantic_index[i] + 1); + } + } + } + if (emit->info.num_written_clipdistance > 0) { emit->clip_mode = CLIP_DISTANCE; } else if (emit->info.writes_clipvertex) { emit->clip_mode = CLIP_VERTEX; } - else if (emit->key.clip_plane_enable) { + else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) { + /* + * Only the last shader in the vertex processing stage needs to + * handle the legacy clip mode. + */ emit->clip_mode = CLIP_LEGACY; } else { @@ -497,6 +751,12 @@ translate_shader_type(unsigned type) return VGPU10_GEOMETRY_SHADER; case PIPE_SHADER_FRAGMENT: return VGPU10_PIXEL_SHADER; + case PIPE_SHADER_TESS_CTRL: + return VGPU10_HULL_SHADER; + case PIPE_SHADER_TESS_EVAL: + return VGPU10_DOMAIN_SHADER; + case PIPE_SHADER_COMPUTE: + return VGPU10_COMPUTE_SHADER; default: assert(!"Unexpected shader type"); return VGPU10_VERTEX_SHADER; @@ -550,7 +810,7 @@ translate_opcode(enum tgsi_opcode opcode) case TGSI_OPCODE_DIV: return VGPU10_OPCODE_DIV; case TGSI_OPCODE_IDIV: - return VGPU10_OPCODE_IDIV; + return VGPU10_OPCODE_VMWARE; case TGSI_OPCODE_DP2: return VGPU10_OPCODE_DP2; case TGSI_OPCODE_BRK: @@ -652,6 +912,64 @@ translate_opcode(enum tgsi_opcode opcode) return VGPU10_OPCODE_LT; case TGSI_OPCODE_ROUND: return VGPU10_OPCODE_ROUND_NE; + /* Begin SM5 opcodes */ + case TGSI_OPCODE_F2D: + return VGPU10_OPCODE_FTOD; + case TGSI_OPCODE_D2F: + return VGPU10_OPCODE_DTOF; + case TGSI_OPCODE_DMUL: + return VGPU10_OPCODE_DMUL; + case TGSI_OPCODE_DADD: + return VGPU10_OPCODE_DADD; + case TGSI_OPCODE_DMAX: + return VGPU10_OPCODE_DMAX; + case TGSI_OPCODE_DMIN: + return VGPU10_OPCODE_DMIN; + case TGSI_OPCODE_DSEQ: + return VGPU10_OPCODE_DEQ; + case TGSI_OPCODE_DSGE: + return VGPU10_OPCODE_DGE; + case TGSI_OPCODE_DSLT: + return VGPU10_OPCODE_DLT; + case TGSI_OPCODE_DSNE: + return VGPU10_OPCODE_DNE; + case TGSI_OPCODE_IBFE: + return VGPU10_OPCODE_IBFE; + case TGSI_OPCODE_UBFE: + return VGPU10_OPCODE_UBFE; + case TGSI_OPCODE_BFI: + return VGPU10_OPCODE_BFI; + case TGSI_OPCODE_BREV: + return VGPU10_OPCODE_BFREV; + case TGSI_OPCODE_POPC: + return VGPU10_OPCODE_COUNTBITS; + case TGSI_OPCODE_LSB: + return VGPU10_OPCODE_FIRSTBIT_LO; + case TGSI_OPCODE_IMSB: + return VGPU10_OPCODE_FIRSTBIT_SHI; + case TGSI_OPCODE_UMSB: + return VGPU10_OPCODE_FIRSTBIT_HI; + case TGSI_OPCODE_INTERP_CENTROID: + return VGPU10_OPCODE_EVAL_CENTROID; + case TGSI_OPCODE_INTERP_SAMPLE: + return VGPU10_OPCODE_EVAL_SAMPLE_INDEX; + case TGSI_OPCODE_BARRIER: + return VGPU10_OPCODE_SYNC; + + /* DX11.1 Opcodes */ + case TGSI_OPCODE_DDIV: + return VGPU10_OPCODE_DDIV; + case TGSI_OPCODE_DRCP: + return VGPU10_OPCODE_DRCP; + case TGSI_OPCODE_D2I: + return VGPU10_OPCODE_DTOI; + case TGSI_OPCODE_D2U: + return VGPU10_OPCODE_DTOU; + case TGSI_OPCODE_I2D: + return VGPU10_OPCODE_ITOD; + case TGSI_OPCODE_U2D: + return VGPU10_OPCODE_UTOD; + case TGSI_OPCODE_SAMPLE_POS: /* Note: we never actually get this opcode because there's no GLSL * function to query multisample resource sample positions. There's @@ -761,66 +1079,60 @@ remap_temp_index(const struct svga_shader_emitter_v10 *emit, /** * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). * Note: the operandType field must already be initialized. + * \param file the register file being accessed + * \param indirect using indirect addressing of the register file? + * \param index2D if true, 2-D indexing is being used (const or temp registers) + * \param indirect2D if true, 2-D indirect indexing being used (for const buf) */ static VGPU10OperandToken0 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, VGPU10OperandToken0 operand0, enum tgsi_file_type file, - boolean indirect, boolean index2D, - unsigned tempArrayID) + boolean indirect, + boolean index2D, bool indirect2D) { - unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep; + VGPU10_OPERAND_INDEX_DIMENSION indexDim; /* * Compute index dimensions */ if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || - operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { + operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || + operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || + operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || + operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP || + operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) { /* there's no swizzle for in-line immediates */ indexDim = VGPU10_OPERAND_INDEX_0D; assert(operand0.selectionMode == 0); } + else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) { + indexDim = VGPU10_OPERAND_INDEX_0D; + } else { - if (index2D || - tempArrayID > 0 || - operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { - indexDim = VGPU10_OPERAND_INDEX_2D; - } - else { - indexDim = VGPU10_OPERAND_INDEX_1D; - } + indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D; } /* - * Compute index representations (immediate, relative, etc). + * Compute index representation(s) (immediate vs relative). */ - if (tempArrayID > 0) { - assert(file == TGSI_FILE_TEMPORARY); - /* First index is the array ID, second index is the array element */ - index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; - if (indirect) { - index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; - } - else { - index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; - } + if (indexDim == VGPU10_OPERAND_INDEX_2D) { + index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE + : VGPU10_OPERAND_INDEX_IMMEDIATE32; + + index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE + : VGPU10_OPERAND_INDEX_IMMEDIATE32; } - else if (indirect) { - if (file == TGSI_FILE_CONSTANT) { - /* index[0] indicates which constant buffer while index[1] indicates - * the position in the constant buffer. - */ - index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; - index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; - } - else { - /* All other register files are 1-dimensional */ - index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; - } + else if (indexDim == VGPU10_OPERAND_INDEX_1D) { + index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE + : VGPU10_OPERAND_INDEX_IMMEDIATE32; + + index1Rep = 0; } else { - index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; - index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + index0Rep = 0; + index1Rep = 0; } operand0.indexDimension = indexDim; @@ -879,13 +1191,18 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, const unsigned sem_index = emit->info.output_semantic_index[index]; unsigned writemask = reg->Register.WriteMask; const boolean indirect = reg->Register.Indirect; - const unsigned tempArrayId = get_temp_array_id(emit, file, index); - const boolean index2d = reg->Register.Dimension; + unsigned tempArrayId = get_temp_array_id(emit, file, index); + boolean index2d = reg->Register.Dimension || tempArrayId > 0; VGPU10OperandToken0 operand0; + if (file == TGSI_FILE_TEMPORARY) { + emit->temp_map[index].initialized = TRUE; + } + if (file == TGSI_FILE_OUTPUT) { if (emit->unit == PIPE_SHADER_VERTEX || - emit->unit == PIPE_SHADER_GEOMETRY) { + emit->unit == PIPE_SHADER_GEOMETRY || + emit->unit == PIPE_SHADER_TESS_EVAL) { if (index == emit->vposition.out_index && emit->vposition.tmp_index != INVALID_INDEX) { /* replace OUTPUT[POS] with TEMP[POS]. We need to store the @@ -913,6 +1230,21 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, file = TGSI_FILE_TEMPORARY; index = emit->clip_vertex_tmp_index; } + else if (sem_name == TGSI_SEMANTIC_COLOR && + emit->key.clamp_vertex_color) { + + /* set the saturate modifier of the instruction + * to clamp the vertex color. + */ + VGPU10OpcodeToken0 *token = + (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token; + token->saturate = TRUE; + } + else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX && + emit->gs.viewport_index_out_index != INVALID_INDEX) { + file = TGSI_FILE_TEMPORARY; + index = emit->gs.viewport_index_tmp_index; + } } else if (emit->unit == PIPE_SHADER_FRAGMENT) { if (sem_name == TGSI_SEMANTIC_POSITION) { @@ -955,6 +1287,116 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, emit->num_output_writes++; } } + else if (emit->unit == PIPE_SHADER_TESS_CTRL) { + if (index == emit->tcs.inner.tgsi_index) { + /* replace OUTPUT[TESSLEVEL] with temp. We are storing it + * in temporary for now so that will be store into appropriate + * registers in post_helper() in patch constant phase. + */ + if (emit->tcs.control_point_phase) { + /* Discard writing into tessfactor in control point phase */ + emit->discard_instruction = TRUE; + } + else { + file = TGSI_FILE_TEMPORARY; + index = emit->tcs.inner.temp_index; + } + } + else if (index == emit->tcs.outer.tgsi_index) { + /* replace OUTPUT[TESSLEVEL] with temp. We are storing it + * in temporary for now so that will be store into appropriate + * registers in post_helper(). + */ + if (emit->tcs.control_point_phase) { + /* Discard writing into tessfactor in control point phase */ + emit->discard_instruction = TRUE; + } + else { + file = TGSI_FILE_TEMPORARY; + index = emit->tcs.outer.temp_index; + } + } + else if (index >= emit->tcs.patch_generic_out_index && + index < (emit->tcs.patch_generic_out_index + + emit->tcs.patch_generic_out_count)) { + if (emit->tcs.control_point_phase) { + /* Discard writing into generic patch constant outputs in + control point phase */ + emit->discard_instruction = TRUE; + } + else { + if (emit->reemit_instruction) { + /* Store results of reemitted instruction in temporary register. */ + file = TGSI_FILE_TEMPORARY; + index = emit->tcs.patch_generic_tmp_index + + (index - emit->tcs.patch_generic_out_index); + /** + * Temporaries for patch constant data can be done + * as indexable temporaries. + */ + tempArrayId = get_temp_array_id(emit, file, index); + index2d = tempArrayId > 0; + + emit->reemit_instruction = FALSE; + } + else { + /* If per-patch outputs is been read in shader, we + * reemit instruction and store results in temporaries in + * patch constant phase. */ + if (emit->info.reads_perpatch_outputs) { + emit->reemit_instruction = TRUE; + } + } + } + } + else if (reg->Register.Dimension) { + /* Only control point outputs are declared 2D in tgsi */ + if (emit->tcs.control_point_phase) { + if (emit->reemit_instruction) { + /* Store results of reemitted instruction in temporary register. */ + index2d = FALSE; + file = TGSI_FILE_TEMPORARY; + index = emit->tcs.control_point_tmp_index + + (index - emit->tcs.control_point_out_index); + emit->reemit_instruction = FALSE; + } + else { + /* The mapped control point outputs are 1-D */ + index2d = FALSE; + if (emit->info.reads_pervertex_outputs) { + /* If per-vertex outputs is been read in shader, we + * reemit instruction and store results in temporaries + * control point phase. */ + emit->reemit_instruction = TRUE; + } + } + + if (sem_name == TGSI_SEMANTIC_CLIPDIST && + emit->clip_dist_tmp_index != INVALID_INDEX) { + /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. + * We store the clip distance in a temporary first, then + * we'll copy it to the shadow copy and to CLIPDIST with the + * enabled planes mask in emit_clip_distance_instructions(). + */ + file = TGSI_FILE_TEMPORARY; + index = emit->clip_dist_tmp_index + sem_index; + } + else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && + emit->clip_vertex_tmp_index != INVALID_INDEX) { + /* replace the CLIPVERTEX output register with a temporary */ + assert(emit->clip_mode == CLIP_VERTEX); + assert(sem_index == 0); + file = TGSI_FILE_TEMPORARY; + index = emit->clip_vertex_tmp_index; + } + } + else { + /* Discard writing into control point outputs in + patch constant phase */ + emit->discard_instruction = TRUE; + } + } + } } /* init operand tokens to all zero */ @@ -977,7 +1419,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, check_register_index(emit, operand0.operandType, index); operand0 = setup_operand0_indexing(emit, operand0, file, indirect, - index2d, tempArrayId); + index2d, FALSE); /* Emit tokens */ emit_dword(emit, operand0.value); @@ -993,6 +1435,28 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, } +/** + * Check if temporary register needs to be initialize when + * shader is not using indirect addressing for temporary and uninitialized + * temporary is not used in loop. In these two scenarios, we cannot + * determine if temporary is initialized or not. + */ +static boolean +need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, + unsigned index) +{ + if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY)) + && emit->current_loop_depth == 0) { + if (!emit->temp_map[index].initialized && + emit->temp_map[index].index < emit->num_shader_temps) { + return TRUE; + } + } + + return FALSE; +} + + /** * Translate a src register of a TGSI instruction and emit VGPU10 tokens. * In quite a few cases, we do register substitution. For example, if @@ -1006,19 +1470,23 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, enum tgsi_file_type file = reg->Register.File; unsigned index = reg->Register.Index; const boolean indirect = reg->Register.Indirect; - const unsigned tempArrayId = get_temp_array_id(emit, file, index); - const boolean index2d = reg->Register.Dimension; - const unsigned swizzleX = reg->Register.SwizzleX; - const unsigned swizzleY = reg->Register.SwizzleY; - const unsigned swizzleZ = reg->Register.SwizzleZ; - const unsigned swizzleW = reg->Register.SwizzleW; + unsigned tempArrayId = get_temp_array_id(emit, file, index); + boolean index2d = (reg->Register.Dimension || + tempArrayId > 0 || + file == TGSI_FILE_CONSTANT); + unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; + boolean indirect2d = reg->Dimension.Indirect; + unsigned swizzleX = reg->Register.SwizzleX; + unsigned swizzleY = reg->Register.SwizzleY; + unsigned swizzleZ = reg->Register.SwizzleZ; + unsigned swizzleW = reg->Register.SwizzleW; const boolean absolute = reg->Register.Absolute; const boolean negate = reg->Register.Negate; - bool is_prim_id = FALSE; - VGPU10OperandToken0 operand0; VGPU10OperandToken1 operand1; + operand0.value = operand1.value = 0; + if (emit->unit == PIPE_SHADER_FRAGMENT){ if (file == TGSI_FILE_INPUT) { if (index == emit->fs.face_input_index) { @@ -1031,6 +1499,12 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, file = TGSI_FILE_TEMPORARY; index = emit->fs.fragcoord_tmp_index; } + else if (index == emit->fs.layer_input_index) { + /* Replace INPUT[LAYER] with zero.x */ + file = TGSI_FILE_IMMEDIATE; + index = emit->fs.layer_imm_index; + swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; + } else { /* We remap fragment shader inputs to that FS input indexes * match up with VS/GS output indexes. @@ -1045,6 +1519,23 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, file = TGSI_FILE_TEMPORARY; index = emit->fs.sample_pos_tmp_index; } + else if (index == emit->fs.sample_mask_in_sys_index) { + /* Emitted as vCoverage0.x */ + /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32) + * elements where s is the maximum number of color samples supported + * by the implementation. With current implementation, we should not + * have more than one element. So assert if Index != 0 + */ + assert((!reg->Register.Indirect && reg->Register.Index == 0) || + reg->Register.Indirect); + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + emit_dword(emit, operand0.value); + return; + } else { /* Map the TGSI system value to a VGPU10 input register */ assert(index < ARRAY_SIZE(emit->system_value_indexes)); @@ -1055,9 +1546,19 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } else if (emit->unit == PIPE_SHADER_GEOMETRY) { if (file == TGSI_FILE_INPUT) { - is_prim_id = (index == emit->gs.prim_id_index); + if (index == emit->gs.prim_id_index) { + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + } index = emit->linkage.input_map[index]; } + else if (file == TGSI_FILE_SYSTEM_VALUE && + index == emit->gs.invocation_id_sys_index) { + /* Emitted as vGSInstanceID0.x */ + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID; + index = 0; + } } else if (emit->unit == PIPE_SHADER_VERTEX) { if (file == TGSI_FILE_INPUT) { @@ -1080,23 +1581,178 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, index = emit->system_value_indexes[index]; } } + else if (emit->unit == PIPE_SHADER_TESS_CTRL) { - operand0.value = operand1.value = 0; + if (file == TGSI_FILE_SYSTEM_VALUE) { + if (index == emit->tcs.vertices_per_patch_index) { + /** + * if source register is the system value for vertices_per_patch, + * replace it with the immediate. + */ + file = TGSI_FILE_IMMEDIATE; + index = emit->tcs.imm_index; + swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; + } + else if (index == emit->tcs.invocation_id_sys_index) { + if (emit->tcs.control_point_phase) { + /** + * Emitted as vOutputControlPointID.x + */ + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID; + index = 0; + } + else { + /* There is no control point ID input declaration in + * the patch constant phase in hull shader. + * Since for now we are emitting all instructions in + * the patch constant phase, we are replacing the + * control point ID reference with the immediate 0. + */ + file = TGSI_FILE_IMMEDIATE; + index = emit->tcs.imm_index; + swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W; + } + } + else if (index == emit->tcs.prim_id_index) { + /** + * Emitted as vPrim.x + */ + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + index = 0; + } + } + else if (file == TGSI_FILE_INPUT) { + index = emit->linkage.input_map[index]; + if (!emit->tcs.control_point_phase) { + /* Emitted as vicp */ + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; + assert(reg->Register.Dimension); + } + } + else if (file == TGSI_FILE_OUTPUT) { + if ((index >= emit->tcs.patch_generic_out_index && + index < (emit->tcs.patch_generic_out_index + + emit->tcs.patch_generic_out_count)) || + index == emit->tcs.inner.tgsi_index || + index == emit->tcs.outer.tgsi_index) { + if (emit->tcs.control_point_phase) { + emit->discard_instruction = TRUE; + } + else { + /* Device doesn't allow reading from output so + * use corresponding temporary register as source */ + file = TGSI_FILE_TEMPORARY; + if (index == emit->tcs.inner.tgsi_index) { + index = emit->tcs.inner.temp_index; + } + else if (index == emit->tcs.outer.tgsi_index) { + index = emit->tcs.outer.temp_index; + } + else { + index = emit->tcs.patch_generic_tmp_index + + (index - emit->tcs.patch_generic_out_index); + } - if (is_prim_id) { - /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but - * our virtual GPU accepts this as-is. - */ - operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; - operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + /** + * Temporaries for patch constant data can be done + * as indexable temporaries. + */ + tempArrayId = get_temp_array_id(emit, file, index); + index2d = tempArrayId > 0; + index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; + } + } + else if (index2d) { + if (emit->tcs.control_point_phase) { + /* Device doesn't allow reading from output so + * use corresponding temporary register as source */ + file = TGSI_FILE_TEMPORARY; + index2d = FALSE; + index = emit->tcs.control_point_tmp_index + + (index - emit->tcs.control_point_out_index); + } + else { + emit->discard_instruction = TRUE; + } + } + } } - else { + else if (emit->unit == PIPE_SHADER_TESS_EVAL) { + if (file == TGSI_FILE_SYSTEM_VALUE) { + if (index == emit->tes.tesscoord_sys_index) { + /** + * Emitted as vDomain + */ + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT; + index = 0; + } + else if (index == emit->tes.inner.tgsi_index) { + file = TGSI_FILE_TEMPORARY; + index = emit->tes.inner.temp_index; + } + else if (index == emit->tes.outer.tgsi_index) { + file = TGSI_FILE_TEMPORARY; + index = emit->tes.outer.temp_index; + } + else if (index == emit->tes.prim_id_index) { + /** + * Emitted as vPrim.x + */ + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + index = 0; + } + + } + else if (file == TGSI_FILE_INPUT) { + if (index2d) { + /* 2D input is emitted as vcp (input control point). */ + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + /* index specifies the element index and is remapped + * to align with the tcs output index. + */ + index = emit->linkage.input_map[index]; + } + else { + if (index < emit->key.tes.tessfactor_index) + /* index specifies the generic patch index. + * Remapped to match up with the tcs output index. + */ + index = emit->linkage.input_map[index]; + + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + } + } + } + + if (file == TGSI_FILE_ADDRESS) { + index = emit->address_reg_index[index]; + file = TGSI_FILE_TEMPORARY; + } + + if (file == TGSI_FILE_TEMPORARY) { + if (need_temp_reg_initialization(emit, index)) { + emit->initialize_temp_index = index; + emit->discard_instruction = TRUE; + } + } + + if (operand0.value == 0) { + /* if operand0 was not set above for a special case, do the general + * case now. + */ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; operand0.operandType = translate_register_file(file, tempArrayId > 0); } - operand0 = setup_operand0_indexing(emit, operand0, file, indirect, - index2d, tempArrayId); + index2d, indirect2d); if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { @@ -1149,13 +1805,12 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { /* Emit the register index(es) */ - if (index2d || - operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { - emit_dword(emit, reg->Dimension.Index); - } + if (index2d) { + emit_dword(emit, index2); - if (tempArrayId > 0) { - emit_dword(emit, tempArrayId); + if (indirect2d) { + emit_indirect_register(emit, reg->DimIndirect.Index); + } } emit_dword(emit, remap_temp_index(emit, file, index)); @@ -1271,12 +1926,34 @@ emit_rasterizer_register(struct svga_shader_emitter_v10 *emit) /** - * Emit the token for a VGPU10 opcode. + * Emit tokens for the "stream" register used by the + * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions. + */ +static void +emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index) +{ + VGPU10OperandToken0 operand0; + + /* init */ + operand0.value = 0; + + /* No register index for rasterizer index (there's only one) */ + operand0.operandType = VGPU10_OPERAND_TYPE_STREAM; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + + emit_dword(emit, operand0.value); + emit_dword(emit, index); +} + + +/** + * Emit the token for a VGPU10 opcode, with precise parameter. * \param saturate clamp result to [0,1]? */ static void -emit_opcode(struct svga_shader_emitter_v10 *emit, - VGPU10_OPCODE_TYPE vgpu10_opcode, boolean saturate) +emit_opcode_precise(struct svga_shader_emitter_v10 *emit, + unsigned vgpu10_opcode, boolean saturate, boolean precise) { VGPU10OpcodeToken0 token0; @@ -1285,7 +1962,26 @@ emit_opcode(struct svga_shader_emitter_v10 *emit, token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ token0.saturate = saturate; + /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for + * 'invariant' declarations. Only set preciseValues=1 if we have SM5. + */ + token0.preciseValues = precise && emit->version >= 50; + emit_dword(emit, token0.value); + + emit->uses_precise_qualifier |= token0.preciseValues; +} + + +/** + * Emit the token for a VGPU10 opcode. + * \param saturate clamp result to [0,1]? + */ +static void +emit_opcode(struct svga_shader_emitter_v10 *emit, + unsigned vgpu10_opcode, boolean saturate) +{ + emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE); } @@ -1694,6 +2390,32 @@ find_immediate(struct svga_shader_emitter_v10 *emit, } +/** + * As above, but search for a double[2] pair. + */ +static int +find_immediate_dbl(struct svga_shader_emitter_v10 *emit, + double x, double y) +{ + const unsigned endIndex = emit->num_immediates; + unsigned i; + + assert(emit->immediates_emitted); + + /* Search immediates for x, y, z, w */ + for (i = 0; i < endIndex; i++) { + if (x == emit->immediates_dbl[i][0] && + y == emit->immediates_dbl[i][1]) { + return i; + } + } + /* Should never try to use an immediate value that wasn't pre-declared */ + assert(!"find_immediate_dbl() failed!"); + return -1; +} + + + /** * Return a tgsi_full_src_register for an immediate/literal * union tgsi_immediate_data[4] value. @@ -1831,21 +2553,41 @@ make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) } -/** - * Allocate space for a union tgsi_immediate_data[4] immediate. - * \return the index/position of the immediate. - */ -static unsigned -alloc_immediate_4(struct svga_shader_emitter_v10 *emit, - const union tgsi_immediate_data imm[4]) +static struct tgsi_full_src_register +make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value) { - unsigned n = emit->num_immediates++; - assert(!emit->immediates_emitted); - assert(n < ARRAY_SIZE(emit->immediates)); - emit->immediates[n][0] = imm[0]; - emit->immediates[n][1] = imm[1]; - emit->immediates[n][2] = imm[2]; - emit->immediates[n][3] = imm[3]; + struct tgsi_full_src_register reg; + int immpos = find_immediate_dbl(emit, value, value); + + assert(immpos >= 0); + + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + reg.Register.Index = immpos; + reg.Register.SwizzleX = TGSI_SWIZZLE_X; + reg.Register.SwizzleY = TGSI_SWIZZLE_Y; + reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; + reg.Register.SwizzleW = TGSI_SWIZZLE_W; + + return reg; +} + + +/** + * Allocate space for a union tgsi_immediate_data[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_4(struct svga_shader_emitter_v10 *emit, + const union tgsi_immediate_data imm[4]) +{ + unsigned n = emit->num_immediates++; + assert(!emit->immediates_emitted); + assert(n < ARRAY_SIZE(emit->immediates)); + emit->immediates[n][0] = imm[0]; + emit->immediates[n][1] = imm[1]; + emit->immediates[n][2] = imm[2]; + emit->immediates[n][3] = imm[3]; return n; } @@ -1884,6 +2626,20 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, } +static unsigned +alloc_immediate_double2(struct svga_shader_emitter_v10 *emit, + double x, double y) +{ + unsigned n = emit->num_immediates++; + assert(!emit->immediates_emitted); + assert(n < ARRAY_SIZE(emit->immediates)); + emit->immediates_dbl[n][0] = x; + emit->immediates_dbl[n][1] = y; + return n; + +} + + /** * Allocate a shader input to store a system value. */ @@ -2057,8 +2813,39 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, emit->gs.max_out_vertices = prop->u[0].Data; break; - default: + case TGSI_PROPERTY_GS_INVOCATIONS: + emit->gs.invocations = prop->u[0].Data; + break; + + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + case TGSI_PROPERTY_NEXT_SHADER: + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + /* no-op */ + break; + + case TGSI_PROPERTY_TCS_VERTICES_OUT: + emit->tcs.vertices_out = prop->u[0].Data; + break; + + case TGSI_PROPERTY_TES_PRIM_MODE: + emit->tes.prim_mode = prop->u[0].Data; + break; + + case TGSI_PROPERTY_TES_SPACING: + emit->tes.spacing = prop->u[0].Data; + break; + + case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: + emit->tes.vertices_order_cw = prop->u[0].Data; + break; + + case TGSI_PROPERTY_TES_POINT_MODE: + emit->tes.point_mode = prop->u[0].Data; break; + + default: + debug_printf("Unexpected TGSI property %s\n", + tgsi_property_names[prop->Property.PropertyName]); } return TRUE; @@ -2094,573 +2881,434 @@ emit_property_instructions(struct svga_shader_emitter_v10 *emit) opcode0.primitive = emit->gs.prim_type; emit_property_instruction(emit, opcode0, 0, 0); - /* emit output primitive topology declaration */ - opcode0.value = 0; - opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; - opcode0.primitiveTopology = emit->gs.prim_topology; - emit_property_instruction(emit, opcode0, 0, 0); - /* emit max output vertices */ opcode0.value = 0; opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); + + if (emit->version >= 50 && emit->gs.invocations > 0) { + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT; + emit_property_instruction(emit, opcode0, 1, emit->gs.invocations); + } } /** - * Emit a vgpu10 declaration "instruction". - * \param index the register index - * \param size array size of the operand. In most cases, it is 1, - * but for inputs to geometry shader, the array size varies - * depending on the primitive type. + * A helper function to declare tessellator domain in a hull shader or + * in the domain shader. */ static void -emit_decl_instruction(struct svga_shader_emitter_v10 *emit, - VGPU10OpcodeToken0 opcode0, - VGPU10OperandToken0 operand0, - VGPU10NameToken name_token, - unsigned index, unsigned size) +emit_tessellator_domain(struct svga_shader_emitter_v10 *emit, + enum pipe_prim_type prim_mode) { - assert(opcode0.opcodeType); - assert(operand0.mask || - (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) || - (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK)); + VGPU10OpcodeToken0 opcode0; + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN; + switch (prim_mode) { + case PIPE_PRIM_QUADS: + case PIPE_PRIM_LINES: + opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD; + break; + case PIPE_PRIM_TRIANGLES: + opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI; + break; + default: + debug_printf("Invalid tessellator prim mode %d\n", prim_mode); + opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED; + } begin_emit_instruction(emit); emit_dword(emit, opcode0.value); - - emit_dword(emit, operand0.value); - - if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { - /* Next token is the index of the register to declare */ - emit_dword(emit, index); - } - else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { - /* Next token is the size of the register */ - emit_dword(emit, size); - - /* Followed by the index of the register */ - emit_dword(emit, index); - } - - if (name_token.value) { - emit_dword(emit, name_token.value); - } - end_emit_instruction(emit); } /** - * Emit the declaration for a shader input. - * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx - * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x - * \param dim index dimension - * \param index the input register index - * \param size array size of the operand. In most cases, it is 1, - * but for inputs to geometry shader, the array size varies - * depending on the primitive type. - * \param name one of VGPU10_NAME_x - * \parma numComp number of components - * \param selMode component selection mode - * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values - * \param interpMode interpolation mode + * Emit domain shader declarations. */ static void -emit_input_declaration(struct svga_shader_emitter_v10 *emit, - VGPU10_OPCODE_TYPE opcodeType, - VGPU10_OPERAND_TYPE operandType, - VGPU10_OPERAND_INDEX_DIMENSION dim, - unsigned index, unsigned size, - VGPU10_SYSTEM_NAME name, - VGPU10_OPERAND_NUM_COMPONENTS numComp, - VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, - unsigned usageMask, - VGPU10_INTERPOLATION_MODE interpMode) +emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit) { VGPU10OpcodeToken0 opcode0; - VGPU10OperandToken0 operand0; - VGPU10NameToken name_token; - - assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); - assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || - opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || - opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || - opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV || - opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); - assert(operandType == VGPU10_OPERAND_TYPE_INPUT || - operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); - assert(numComp <= VGPU10_OPERAND_4_COMPONENT); - assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); - assert(dim <= VGPU10_OPERAND_INDEX_3D); - assert(name == VGPU10_NAME_UNDEFINED || - name == VGPU10_NAME_POSITION || - name == VGPU10_NAME_INSTANCE_ID || - name == VGPU10_NAME_VERTEX_ID || - name == VGPU10_NAME_PRIMITIVE_ID || - name == VGPU10_NAME_IS_FRONT_FACE || - name == VGPU10_NAME_SAMPLE_INDEX); - - assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || - interpMode == VGPU10_INTERPOLATION_CONSTANT || - interpMode == VGPU10_INTERPOLATION_LINEAR || - interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || - interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || - interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID || - interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE || - interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE); - - check_register_index(emit, opcodeType, index); - opcode0.value = operand0.value = name_token.value = 0; - - opcode0.opcodeType = opcodeType; - opcode0.interpolationMode = interpMode; + assert(emit->unit == PIPE_SHADER_TESS_EVAL); - operand0.operandType = operandType; - operand0.numComponents = numComp; - operand0.selectionMode = selMode; - operand0.mask = usageMask; - operand0.indexDimension = dim; - operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; - if (dim == VGPU10_OPERAND_INDEX_2D) - operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + /* Emit the input control point count */ + assert(emit->key.tes.vertices_per_patch > 0 && + emit->key.tes.vertices_per_patch <= 32); - name_token.name = name; + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; + opcode0.controlPointCount = emit->key.tes.vertices_per_patch; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); - emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); + emit_tessellator_domain(emit, emit->tes.prim_mode); } /** - * Emit the declaration for a shader output. - * \param type one of VGPU10_OPCODE_DCL_OUTPUTx - * \param index the output register index - * \param name one of VGPU10_NAME_x - * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed + * to implement some instructions. We pre-allocate those values here + * in the immediate constant buffer. */ static void -emit_output_declaration(struct svga_shader_emitter_v10 *emit, - VGPU10_OPCODE_TYPE type, unsigned index, - VGPU10_SYSTEM_NAME name, - unsigned usageMask) +alloc_common_immediates(struct svga_shader_emitter_v10 *emit) { - VGPU10OpcodeToken0 opcode0; - VGPU10OperandToken0 operand0; - VGPU10NameToken name_token; + unsigned n = 0; - assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); - assert(type == VGPU10_OPCODE_DCL_OUTPUT || - type == VGPU10_OPCODE_DCL_OUTPUT_SGV || - type == VGPU10_OPCODE_DCL_OUTPUT_SIV); - assert(name == VGPU10_NAME_UNDEFINED || - name == VGPU10_NAME_POSITION || - name == VGPU10_NAME_PRIMITIVE_ID || - name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || - name == VGPU10_NAME_CLIP_DISTANCE); + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); - check_register_index(emit, type, index); + if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f); + } - opcode0.value = operand0.value = name_token.value = 0; + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 0, 1, 0, -1); - opcode0.opcodeType = type; - operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; - operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; - operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; - operand0.mask = usageMask; - operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; - operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 || + emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 31, 0, 0, 0); + } - name_token.name = name; + if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 || + emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 || + emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 32, 0, 0, 0); + } - emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); -} + if (emit->key.vs.attrib_puint_to_snorm) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f); + } + if (emit->key.vs.attrib_puint_to_uscaled) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); + } -/** - * Emit the declaration for the fragment depth output. - */ -static void -emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) -{ - VGPU10OpcodeToken0 opcode0; - VGPU10OperandToken0 operand0; - VGPU10NameToken name_token; + if (emit->key.vs.attrib_puint_to_sscaled) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 22, 12, 2, 0); - assert(emit->unit == PIPE_SHADER_FRAGMENT); + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 22, 30, 0, 0); + } - opcode0.value = operand0.value = name_token.value = 0; + if (emit->vposition.num_prescale > 1) { + unsigned i; + for (i = 0; i < emit->vposition.num_prescale; i+=4) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, i, i+1, i+2, i+3); + } + } - opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; - operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; - operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; - operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; - operand0.mask = 0; + emit->immediates_dbl = (double (*)[2]) emit->immediates; - emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); -} + if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) { + emit->common_immediate_pos[n++] = + alloc_immediate_double2(emit, -1.0, -1.0); + } + if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) { + emit->common_immediate_pos[n++] = + alloc_immediate_double2(emit, 0.0, 0.0); + emit->common_immediate_pos[n++] = + alloc_immediate_double2(emit, 1.0, 1.0); + } -/** - * Emit the declaration for the fragment sample mask/coverage output. - */ -static void -emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit) -{ - VGPU10OpcodeToken0 opcode0; - VGPU10OperandToken0 operand0; - VGPU10NameToken name_token; + if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0); + } - assert(emit->unit == PIPE_SHADER_FRAGMENT); - assert(emit->version >= 41); + assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); - opcode0.value = operand0.value = name_token.value = 0; + unsigned i; - opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; - operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; - operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; - operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; - operand0.mask = 0; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (emit->key.tex[i].texel_bias) { + /* Replace 0.0f if more immediate float value is needed */ + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f); + break; + } + } - emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); + assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); + emit->num_common_immediates = n; } /** - * Emit the declaration for a system value input/output. - */ + * Emit hull shader declarations. +*/ static void -emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, - enum tgsi_semantic semantic_name, unsigned index) +emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) { - switch (semantic_name) { - case TGSI_SEMANTIC_INSTANCEID: - index = alloc_system_value_index(emit, index); - emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_1D, - index, 1, - VGPU10_NAME_INSTANCE_ID, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_X, - VGPU10_INTERPOLATION_UNDEFINED); - break; - case TGSI_SEMANTIC_VERTEXID: - index = alloc_system_value_index(emit, index); - emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_1D, - index, 1, - VGPU10_NAME_VERTEX_ID, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_X, - VGPU10_INTERPOLATION_UNDEFINED); + VGPU10OpcodeToken0 opcode0; + + /* Emit the input control point count */ + assert(emit->key.tcs.vertices_per_patch > 0 && + emit->key.tcs.vertices_per_patch <= 32); + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; + opcode0.controlPointCount = emit->key.tcs.vertices_per_patch; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); + + /* Emit the output control point count */ + assert(emit->tcs.vertices_out >= 0 && emit->tcs.vertices_out <= 32); + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT; + opcode0.controlPointCount = emit->tcs.vertices_out; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); + + /* Emit tessellator domain */ + emit_tessellator_domain(emit, emit->key.tcs.prim_mode); + + /* Emit tessellator output primitive */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE; + if (emit->key.tcs.point_mode) { + opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT; + } + else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { + opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE; + } + else { + assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS || + emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES); + + if (emit->key.tcs.vertices_order_cw) + opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else + opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW; + } + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); + + /* Emit tessellator partitioning */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING; + switch (emit->key.tcs.spacing) { + case PIPE_TESS_SPACING_FRACTIONAL_ODD: + opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; break; - case TGSI_SEMANTIC_SAMPLEID: - assert(emit->unit == PIPE_SHADER_FRAGMENT); - emit->fs.sample_id_sys_index = index; - index = alloc_system_value_index(emit, index); - emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_1D, - index, 1, - VGPU10_NAME_SAMPLE_INDEX, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_X, - VGPU10_INTERPOLATION_CONSTANT); + case PIPE_TESS_SPACING_FRACTIONAL_EVEN: + opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; break; - case TGSI_SEMANTIC_SAMPLEPOS: - /* This system value contains the position of the current sample - * when using per-sample shading. We implement this by calling - * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample - * index as the argument. See emit_sample_position_instructions(). - */ - assert(emit->version >= 41); - emit->fs.sample_pos_sys_index = index; - index = alloc_system_value_index(emit, index); + case PIPE_TESS_SPACING_EQUAL: + opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER; break; default: - debug_printf("unexpected sytem value semantic index %u\n", - semantic_name); + debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing); + opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED; } -} + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); -/** - * Translate a TGSI declaration to VGPU10. - */ -static boolean -emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_declaration *decl) -{ - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - /* do nothing - see emit_input_declarations() */ - return TRUE; + /* Declare constant registers */ + emit_constant_declaration(emit); - case TGSI_FILE_OUTPUT: - assert(decl->Range.First == decl->Range.Last); - emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; - return TRUE; + /* Declare samplers and resources */ + emit_sampler_declarations(emit); + emit_resource_declarations(emit); - case TGSI_FILE_TEMPORARY: - /* Don't declare the temps here. Just keep track of how many - * and emit the declaration later. - */ - if (decl->Declaration.Array) { - /* Indexed temporary array. Save the start index of the array - * and the size of the array. - */ - const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); - unsigned i; + alloc_common_immediates(emit); - assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); + int nVertices = emit->key.tcs.vertices_per_patch; + emit->tcs.imm_index = + alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0); - /* Save this array so we can emit the declaration for it later */ - emit->temp_arrays[arrayID].start = decl->Range.First; - emit->temp_arrays[arrayID].size = - decl->Range.Last - decl->Range.First + 1; - - emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); - assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); - emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); - - /* Fill in the temp_map entries for this array */ - for (i = decl->Range.First; i <= decl->Range.Last; i++) { - emit->temp_map[i].arrayId = arrayID; - emit->temp_map[i].index = i - decl->Range.First; - } - } + /* Now, emit the constant block containing all the immediates + * declared by shader, as well as the extra ones seen above. + */ + emit_vgpu10_immediates_block(emit); - /* for all temps, indexed or not, keep track of highest index */ - emit->num_shader_temps = MAX2(emit->num_shader_temps, - decl->Range.Last + 1); - return TRUE; +} - case TGSI_FILE_CONSTANT: - /* Don't declare constants here. Just keep track and emit later. */ - { - unsigned constbuf = 0, num_consts; - if (decl->Declaration.Dimension) { - constbuf = decl->Dim.Index2D; - } - /* We throw an assertion here when, in fact, the shader should never - * have linked due to constbuf index out of bounds, so we shouldn't - * have reached here. - */ - assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); - num_consts = MAX2(emit->num_shader_consts[constbuf], - decl->Range.Last + 1); +/** + * A helper function to determine if control point phase is needed. + * Returns TRUE if there is control point output. + */ +static boolean +needs_control_point_phase(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; - if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { - debug_printf("Warning: constant buffer is declared to size [%u]" - " but [%u] is the limit.\n", - num_consts, - VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); - } - /* The linker doesn't enforce the max UBO size so we clamp here */ - emit->num_shader_consts[constbuf] = - MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); - } + assert(emit->unit == PIPE_SHADER_TESS_CTRL); + + /* If output control point count does not match the input count, + * we need a control point phase to explicitly set the output control + * points. + */ + if (emit->key.tcs.vertices_per_patch != emit->tcs.vertices_out) return TRUE; - case TGSI_FILE_IMMEDIATE: - assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); - return FALSE; + for (i = 0; i < emit->info.num_outputs; i++) { + switch (emit->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_PATCH: + case TGSI_SEMANTIC_TESSOUTER: + case TGSI_SEMANTIC_TESSINNER: + break; + default: + return TRUE; + } + } + return FALSE; +} - case TGSI_FILE_SYSTEM_VALUE: - emit_system_value_declaration(emit, decl->Semantic.Name, - decl->Range.First); - return TRUE; - case TGSI_FILE_SAMPLER: - /* Don't declare samplers here. Just keep track and emit later. */ - emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); - return TRUE; +/** + * Start the hull shader control point phase + */ +static boolean +emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; -#if 0 - case TGSI_FILE_RESOURCE: - /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ - /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ - assert(!"TGSI_FILE_RESOURCE not handled yet"); + /* If there is no control point output, skip the control point phase. */ + if (!needs_control_point_phase(emit)) return FALSE; -#endif - - case TGSI_FILE_ADDRESS: - emit->num_address_regs = MAX2(emit->num_address_regs, - decl->Range.Last + 1); - return TRUE; - case TGSI_FILE_SAMPLER_VIEW: - { - unsigned unit = decl->Range.First; - assert(decl->Range.First == decl->Range.Last); - emit->sampler_target[unit] = decl->SamplerView.Resource; - /* Note: we can ignore YZW return types for now */ - emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; - emit->sampler_view[unit] = TRUE; - } - return TRUE; + /* Start the control point phase in the hull shader */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); - default: - assert(!"Unexpected type of declaration"); - return FALSE; + /* Declare the output control point ID */ + if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) { + /* Add invocation id declaration if it does not exist */ + emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1; + } + + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID, + VGPU10_OPERAND_INDEX_0D, + 0, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_0_COMPONENT, 0, + 0, + VGPU10_INTERPOLATION_CONSTANT, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); + + if (emit->tcs.prim_id_index != INVALID_INDEX) { + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, + VGPU10_OPERAND_INDEX_0D, + 0, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_0_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + 0, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); } -} + return TRUE; +} /** - * Emit all input declarations. + * Start the hull shader patch constant phase and + * do the second pass of the tcs translation and emit + * the relevant declarations and instructions for this phase. */ static boolean -emit_input_declarations(struct svga_shader_emitter_v10 *emit) +emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, + struct tgsi_parse_context *parse) { - unsigned i; + unsigned inst_number = 0; + boolean ret = TRUE; + VGPU10OpcodeToken0 opcode0; - if (emit->unit == PIPE_SHADER_FRAGMENT) { + emit->skip_instruction = FALSE; - for (i = 0; i < emit->linkage.num_inputs; i++) { - enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; - unsigned usage_mask = emit->info.input_usage_mask[i]; - unsigned index = emit->linkage.input_map[i]; - VGPU10_OPCODE_TYPE type; - VGPU10_INTERPOLATION_MODE interpolationMode; - VGPU10_SYSTEM_NAME name; + /* Start the patch constant phase */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + end_emit_instruction(emit); - if (usage_mask == 0) - continue; /* register is not actually used */ + /* Set the current phase to patch constant phase */ + emit->tcs.control_point_phase = FALSE; - if (semantic_name == TGSI_SEMANTIC_POSITION) { - /* fragment position input */ - type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; - interpolationMode = VGPU10_INTERPOLATION_LINEAR; - name = VGPU10_NAME_POSITION; - if (usage_mask & TGSI_WRITEMASK_W) { - /* we need to replace use of 'w' with '1/w' */ - emit->fs.fragcoord_input_index = i; - } - } - else if (semantic_name == TGSI_SEMANTIC_FACE) { - /* fragment front-facing input */ - type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; - interpolationMode = VGPU10_INTERPOLATION_CONSTANT; - name = VGPU10_NAME_IS_FRONT_FACE; - emit->fs.face_input_index = i; - } - else if (semantic_name == TGSI_SEMANTIC_PRIMID) { - /* primitive ID */ - type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; - interpolationMode = VGPU10_INTERPOLATION_CONSTANT; - name = VGPU10_NAME_PRIMITIVE_ID; - } - else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) { - /* sample index / ID */ - type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; - interpolationMode = VGPU10_INTERPOLATION_CONSTANT; - name = VGPU10_NAME_SAMPLE_INDEX; - } - else { - /* general fragment input */ - type = VGPU10_OPCODE_DCL_INPUT_PS; - interpolationMode = - translate_interpolation(emit, - emit->info.input_interpolate[i], - emit->info.input_interpolate_loc[i]); - - /* keeps track if flat interpolation mode is being used */ - emit->uses_flat_interp |= - (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); + if (emit->tcs.prim_id_index != INVALID_INDEX) { + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, + VGPU10_OPERAND_INDEX_0D, + 0, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_0_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + 0, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); + } - name = VGPU10_NAME_UNDEFINED; - } + /* Emit declarations for this phase */ + emit->index_range.required = + emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; + emit_tcs_input_declarations(emit); - emit_input_declaration(emit, type, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_1D, index, 1, - name, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - interpolationMode); - } + if (emit->index_range.start_index != INVALID_INDEX) { + emit_index_range_declaration(emit); } - else if (emit->unit == PIPE_SHADER_GEOMETRY) { - for (i = 0; i < emit->info.num_inputs; i++) { - enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; - unsigned usage_mask = emit->info.input_usage_mask[i]; - unsigned index = emit->linkage.input_map[i]; - VGPU10_OPCODE_TYPE opcodeType, operandType; - VGPU10_OPERAND_NUM_COMPONENTS numComp; - VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode; - VGPU10_SYSTEM_NAME name; - VGPU10_OPERAND_INDEX_DIMENSION dim; - - if (usage_mask == 0) - continue; /* register is not actually used */ - - opcodeType = VGPU10_OPCODE_DCL_INPUT; - operandType = VGPU10_OPERAND_TYPE_INPUT; - numComp = VGPU10_OPERAND_4_COMPONENT; - selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; - name = VGPU10_NAME_UNDEFINED; - - /* all geometry shader inputs are two dimensional except - * gl_PrimitiveID - */ - dim = VGPU10_OPERAND_INDEX_2D; + emit->index_range.required = + emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; + emit_tcs_output_declarations(emit); - if (semantic_name == TGSI_SEMANTIC_PRIMID) { - /* Primitive ID */ - operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; - dim = VGPU10_OPERAND_INDEX_0D; - numComp = VGPU10_OPERAND_0_COMPONENT; - selMode = 0; + if (emit->index_range.start_index != INVALID_INDEX) { + emit_index_range_declaration(emit); + } + emit->index_range.required = FALSE; - /* also save the register index so we can check for - * primitive id when emit src register. We need to modify the - * operand type, index dimension when emit primitive id src reg. - */ - emit->gs.prim_id_index = i; - } - else if (semantic_name == TGSI_SEMANTIC_POSITION) { - /* vertex position input */ - opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; - name = VGPU10_NAME_POSITION; - } + emit_temporaries_declaration(emit); - emit_input_declaration(emit, opcodeType, operandType, - dim, index, - emit->gs.input_size, - name, - numComp, selMode, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - VGPU10_INTERPOLATION_UNDEFINED); - } - } - else { - assert(emit->unit == PIPE_SHADER_VERTEX); + /* Reset the token position to the first instruction token + * in preparation for the second pass of the shader + */ + parse->Position = emit->tcs.instruction_token_pos; - for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { - unsigned usage_mask = emit->info.input_usage_mask[i]; - unsigned index = i; + while (!tgsi_parse_end_of_tokens(parse)) { + tgsi_parse_token(parse); - if (usage_mask == 0) - continue; /* register is not actually used */ + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); + ret = emit_vgpu10_instruction(emit, inst_number++, + &parse->FullToken.FullInstruction); - emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_1D, index, 1, - VGPU10_NAME_UNDEFINED, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - VGPU10_INTERPOLATION_UNDEFINED); + /* Usually this applies to TCS only. If shader is reading output of + * patch constant in fork phase, we should reemit all instructions + * which are writting into ouput of patch constant in fork phase + * to store results into temporaries. + */ + if (emit->reemit_instruction) { + assert(emit->unit == PIPE_SHADER_TESS_CTRL); + ret = emit_vgpu10_instruction(emit, inst_number, + &parse->FullToken.FullInstruction); } + + if (!ret) + return FALSE; } return TRUE; @@ -2668,211 +3316,1889 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit) /** - * Emit all output declarations. + * Emit index range declaration. */ static boolean -emit_output_declarations(struct svga_shader_emitter_v10 *emit) +emit_index_range_declaration(struct svga_shader_emitter_v10 *emit) { - unsigned i; - - for (i = 0; i < emit->info.num_outputs; i++) { - /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ - const enum tgsi_semantic semantic_name = - emit->info.output_semantic_name[i]; - const unsigned semantic_index = emit->info.output_semantic_index[i]; - unsigned index = i; + if (emit->version < 50) + return TRUE; - if (emit->unit == PIPE_SHADER_FRAGMENT) { - if (semantic_name == TGSI_SEMANTIC_COLOR) { - assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); + assert(emit->index_range.start_index != INVALID_INDEX); + assert(emit->index_range.count != 0); + assert(emit->index_range.required); + assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS); + assert(emit->index_range.dim != 0); + assert(emit->index_range.size != 0); - emit->fs.color_out_index[semantic_index] = index; + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; - emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs, - index + 1); + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE; - /* The semantic index is the shader's color output/buffer index */ - emit_output_declaration(emit, - VGPU10_OPCODE_DCL_OUTPUT, semantic_index, - VGPU10_NAME_UNDEFINED, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.indexDimension = emit->index_range.dim; + operand0.operandType = emit->index_range.operandType; + operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; - if (semantic_index == 0) { - if (emit->key.fs.write_color0_to_n_cbufs > 1) { - /* Emit declarations for the additional color outputs - * for broadcasting. - */ - unsigned j; - for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { - /* Allocate a new output index */ - unsigned idx = emit->info.num_outputs + j - 1; - emit->fs.color_out_index[j] = idx; - emit_output_declaration(emit, - VGPU10_OPCODE_DCL_OUTPUT, idx, - VGPU10_NAME_UNDEFINED, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL); - emit->info.output_semantic_index[idx] = j; - } + if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) + operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; - emit->fs.num_color_outputs = - emit->key.fs.write_color0_to_n_cbufs; - } - } - else { - assert(!emit->key.fs.write_color0_to_n_cbufs); - } - } - else if (semantic_name == TGSI_SEMANTIC_POSITION) { - /* Fragment depth output */ - emit_fragdepth_output_declaration(emit); - } - else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) { - /* Fragment depth output */ - emit_samplemask_output_declaration(emit); - } - else { - assert(!"Bad output semantic name"); - } - } - else { - /* VS or GS */ - VGPU10_COMPONENT_NAME name; - VGPU10_OPCODE_TYPE type; - unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; - - switch (semantic_name) { - case TGSI_SEMANTIC_POSITION: - assert(emit->unit != PIPE_SHADER_FRAGMENT); - type = VGPU10_OPCODE_DCL_OUTPUT_SIV; - name = VGPU10_NAME_POSITION; - /* Save the index of the vertex position output register */ - emit->vposition.out_index = index; - break; - case TGSI_SEMANTIC_CLIPDIST: - type = VGPU10_OPCODE_DCL_OUTPUT_SIV; - name = VGPU10_NAME_CLIP_DISTANCE; - /* save the starting index of the clip distance output register */ - if (semantic_index == 0) - emit->clip_dist_out_index = index; - writemask = emit->output_usage_mask[index]; - writemask = apply_clip_plane_mask(emit, writemask, semantic_index); - if (writemask == 0x0) { - continue; /* discard this do-nothing declaration */ - } - break; - case TGSI_SEMANTIC_PRIMID: - assert(emit->unit == PIPE_SHADER_GEOMETRY); - type = VGPU10_OPCODE_DCL_OUTPUT_SGV; - name = VGPU10_NAME_PRIMITIVE_ID; - break; - case TGSI_SEMANTIC_LAYER: - assert(emit->unit == PIPE_SHADER_GEOMETRY); - type = VGPU10_OPCODE_DCL_OUTPUT_SGV; - name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - type = VGPU10_OPCODE_DCL_OUTPUT; - name = VGPU10_NAME_UNDEFINED; - emit->clip_vertex_out_index = index; - break; - default: - /* generic output */ - type = VGPU10_OPCODE_DCL_OUTPUT; - name = VGPU10_NAME_UNDEFINED; - } + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); - emit_output_declaration(emit, type, index, name, writemask); - } + if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) { + emit_dword(emit, emit->index_range.size); + emit_dword(emit, emit->index_range.start_index); + emit_dword(emit, emit->index_range.count); + } + else { + emit_dword(emit, emit->index_range.start_index); + emit_dword(emit, emit->index_range.count); } - if (emit->vposition.so_index != INVALID_INDEX && - emit->vposition.out_index != INVALID_INDEX) { + end_emit_instruction(emit); - assert(emit->unit != PIPE_SHADER_FRAGMENT); + /* Reset fields in emit->index_range struct except + * emit->index_range.required which will be reset afterwards + */ + emit->index_range.count = 0; + emit->index_range.operandType = VGPU10_NUM_OPERANDS; + emit->index_range.start_index = INVALID_INDEX; + emit->index_range.size = 0; + emit->index_range.dim = 0; - /* Emit the declaration for the non-adjusted vertex position - * for stream output purpose - */ - emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, - emit->vposition.so_index, - VGPU10_NAME_UNDEFINED, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL); - } + return TRUE; +} - if (emit->clip_dist_so_index != INVALID_INDEX && - emit->clip_dist_out_index != INVALID_INDEX) { - assert(emit->unit != PIPE_SHADER_FRAGMENT); +/** + * Emit a vgpu10 declaration "instruction". + * \param index the register index + * \param size array size of the operand. In most cases, it is 1, + * but for inputs to geometry shader, the array size varies + * depending on the primitive type. + */ +static void +emit_decl_instruction(struct svga_shader_emitter_v10 *emit, + VGPU10OpcodeToken0 opcode0, + VGPU10OperandToken0 operand0, + VGPU10NameToken name_token, + unsigned index, unsigned size) +{ + assert(opcode0.opcodeType); + assert(operand0.mask || + (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) || + (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) || + (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) || + (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || + (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) || + (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) || + (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) || + (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM)); - /* Emit the declaration for the clip distance shadow copy which - * will be used for stream output purpose and for clip distance - * varying variable - */ - emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, - emit->clip_dist_so_index, - VGPU10_NAME_UNDEFINED, - emit->output_usage_mask[emit->clip_dist_out_index]); + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); - if (emit->info.num_written_clipdistance > 4) { - /* for the second clip distance register, each handles 4 planes */ - emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, - emit->clip_dist_so_index + 1, - VGPU10_NAME_UNDEFINED, - emit->output_usage_mask[emit->clip_dist_out_index+1]); - } + emit_dword(emit, operand0.value); + + if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { + /* Next token is the index of the register to declare */ + emit_dword(emit, index); + } + else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { + /* Next token is the size of the register */ + emit_dword(emit, size); + + /* Followed by the index of the register */ + emit_dword(emit, index); } - return TRUE; + if (name_token.value) { + emit_dword(emit, name_token.value); + } + + end_emit_instruction(emit); } /** - * Emit the declaration for the temporary registers. + * Emit the declaration for a shader input. + * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx + * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x + * \param dim index dimension + * \param index the input register index + * \param size array size of the operand. In most cases, it is 1, + * but for inputs to geometry shader, the array size varies + * depending on the primitive type. For tessellation control + * shader, the array size is the vertex count per patch. + * \param name one of VGPU10_NAME_x + * \parma numComp number of components + * \param selMode component selection mode + * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + * \param interpMode interpolation mode */ -static boolean -emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) +static void +emit_input_declaration(struct svga_shader_emitter_v10 *emit, + VGPU10_OPCODE_TYPE opcodeType, + VGPU10_OPERAND_TYPE operandType, + VGPU10_OPERAND_INDEX_DIMENSION dim, + unsigned index, unsigned size, + VGPU10_SYSTEM_NAME name, + VGPU10_OPERAND_NUM_COMPONENTS numComp, + VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, + unsigned usageMask, + VGPU10_INTERPOLATION_MODE interpMode, + boolean addSignature, + SVGA3dDXSignatureSemanticName sgnName) { - unsigned total_temps, reg, i; + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; - total_temps = emit->num_shader_temps; + assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || + opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || + opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); + assert(operandType == VGPU10_OPERAND_TYPE_INPUT || + operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || + operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK || + operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || + operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID || + operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT || + operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT || + operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT || + operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || + operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID || + operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP); - /* If there is indirect access to non-indexable temps in the shader, - * convert those temps to indexable temps. This works around a bug - * in the GLSL->TGSI translator exposed in piglit test - * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. - * Internal temps added by the driver remain as non-indexable temps. - */ - if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && - emit->num_temp_arrays == 0) { - unsigned arrayID; + assert(numComp <= VGPU10_OPERAND_4_COMPONENT); + assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); + assert(dim <= VGPU10_OPERAND_INDEX_3D); + assert(name == VGPU10_NAME_UNDEFINED || + name == VGPU10_NAME_POSITION || + name == VGPU10_NAME_INSTANCE_ID || + name == VGPU10_NAME_VERTEX_ID || + name == VGPU10_NAME_PRIMITIVE_ID || + name == VGPU10_NAME_IS_FRONT_FACE || + name == VGPU10_NAME_SAMPLE_INDEX || + name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || + name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX); + + assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || + interpMode == VGPU10_INTERPOLATION_CONSTANT || + interpMode == VGPU10_INTERPOLATION_LINEAR || + interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID || + interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE); - arrayID = 1; - emit->num_temp_arrays = arrayID + 1; - emit->temp_arrays[arrayID].start = 0; - emit->temp_arrays[arrayID].size = total_temps; + check_register_index(emit, opcodeType, index); - /* Fill in the temp_map entries for this temp array */ - for (i = 0; i < total_temps; i++) { - emit->temp_map[i].arrayId = arrayID; - emit->temp_map[i].index = i; + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = opcodeType; + opcode0.interpolationMode = interpMode; + + operand0.operandType = operandType; + operand0.numComponents = numComp; + operand0.selectionMode = selMode; + operand0.mask = usageMask; + operand0.indexDimension = dim; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + if (dim == VGPU10_OPERAND_INDEX_2D) + operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + + emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); + + if (addSignature) { + struct svga_shader_signature *sgn = &emit->signature; + if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) { + /* Set patch constant signature */ + SVGA3dDXShaderSignatureEntry *sgnEntry = + &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; + set_shader_signature_entry(sgnEntry, index, + sgnName, usageMask, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); + + } else if (operandType == VGPU10_OPERAND_TYPE_INPUT || + operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) { + /* Set input signature */ + SVGA3dDXShaderSignatureEntry *sgnEntry = + &sgn->inputs[sgn->header.numInputSignatures++]; + set_shader_signature_entry(sgnEntry, index, + sgnName, usageMask, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); } } - /* Allocate extra temps for specially-implemented instructions, - * such as LIT. - */ - total_temps += MAX_INTERNAL_TEMPS; + if (emit->index_range.required) { + /* Here, index_range declaration is only applicable for opcodeType + * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and + * for operandType VGPU10_OPERAND_TYPE_INPUT, + * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and + * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT. + */ + if ((opcodeType != VGPU10_OPCODE_DCL_INPUT && + opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) || + (operandType != VGPU10_OPERAND_TYPE_INPUT && + operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT && + operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) { + if (emit->index_range.start_index != INVALID_INDEX) { + emit_index_range_declaration(emit); + } + return; + } - if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { - if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || - emit->key.clip_plane_enable || - emit->vposition.so_index != INVALID_INDEX) { - emit->vposition.tmp_index = total_temps; - total_temps += 1; + if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { + /* Need record new index_range */ + emit->index_range.count = 1; + emit->index_range.operandType = operandType; + emit->index_range.start_index = index; + emit->index_range.size = size; + emit->index_range.dim = dim; } + else if (index != + (emit->index_range.start_index + emit->index_range.count) || + emit->index_range.operandType != operandType) { + /* Input index is not contiguous with index range or operandType is + * different from index range's operandType. We need to emit current + * index_range first and then start recording next index range. + */ + emit_index_range_declaration(emit); - if (emit->unit == PIPE_SHADER_VERTEX) { - unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | + emit->index_range.count = 1; + emit->index_range.operandType = operandType; + emit->index_range.start_index = index; + emit->index_range.size = size; + emit->index_range.dim = dim; + } + else if (emit->index_range.operandType == operandType) { + /* Since input index is contiguous with index range and operandType + * is same as index range's operandType, increment index range count. + */ + emit->index_range.count++; + } + } +} + + +/** + * Emit the declaration for a shader output. + * \param type one of VGPU10_OPCODE_DCL_OUTPUTx + * \param index the output register index + * \param name one of VGPU10_NAME_x + * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + */ +static void +emit_output_declaration(struct svga_shader_emitter_v10 *emit, + VGPU10_OPCODE_TYPE type, unsigned index, + VGPU10_SYSTEM_NAME name, + unsigned writemask, + boolean addSignature, + SVGA3dDXSignatureSemanticName sgnName) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + assert(type == VGPU10_OPCODE_DCL_OUTPUT || + type == VGPU10_OPCODE_DCL_OUTPUT_SGV || + type == VGPU10_OPCODE_DCL_OUTPUT_SIV); + assert(name == VGPU10_NAME_UNDEFINED || + name == VGPU10_NAME_POSITION || + name == VGPU10_NAME_PRIMITIVE_ID || + name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || + name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX || + name == VGPU10_NAME_CLIP_DISTANCE); + + check_register_index(emit, type, index); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = type; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = writemask; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + + emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); + + /* Capture output signature */ + if (addSignature) { + struct svga_shader_signature *sgn = &emit->signature; + SVGA3dDXShaderSignatureEntry *sgnEntry = + &sgn->outputs[sgn->header.numOutputSignatures++]; + set_shader_signature_entry(sgnEntry, index, + sgnName, writemask, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); + } + + if (emit->index_range.required) { + /* Here, index_range declaration is only applicable for opcodeType + * VGPU10_OPCODE_DCL_OUTPUT and for operandType + * VGPU10_OPERAND_TYPE_OUTPUT. + */ + if (type != VGPU10_OPCODE_DCL_OUTPUT) { + if (emit->index_range.start_index != INVALID_INDEX) { + emit_index_range_declaration(emit); + } + return; + } + + if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { + /* Need record new index_range */ + emit->index_range.count = 1; + emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; + emit->index_range.start_index = index; + emit->index_range.size = 1; + emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; + } + else if (index != + (emit->index_range.start_index + emit->index_range.count)) { + /* Output index is not contiguous with index range. We need to + * emit current index_range first and then start recording next + * index range. + */ + emit_index_range_declaration(emit); + + emit->index_range.count = 1; + emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; + emit->index_range.start_index = index; + emit->index_range.size = 1; + emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; + } + else { + /* Since output index is contiguous with index range, increment + * index range count. + */ + emit->index_range.count++; + } + } +} + + +/** + * Emit the declaration for the fragment depth output. + */ +static void +emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.mask = 0; + + emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); +} + + +/** + * Emit the declaration for the fragment sample mask/coverage output. + */ +static void +emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + assert(emit->version >= 41); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.mask = 0; + + emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); +} + + +/** + * Emit output declarations for fragment shader. + */ +static void +emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned int i; + + for (i = 0; i < emit->info.num_outputs; i++) { + /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ + const enum tgsi_semantic semantic_name = + emit->info.output_semantic_name[i]; + const unsigned semantic_index = emit->info.output_semantic_index[i]; + unsigned index = i; + + if (semantic_name == TGSI_SEMANTIC_COLOR) { + assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); + + emit->fs.color_out_index[semantic_index] = index; + + emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs, + index + 1); + + /* The semantic index is the shader's color output/buffer index */ + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT, semantic_index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + + if (semantic_index == 0) { + if (emit->key.fs.write_color0_to_n_cbufs > 1) { + /* Emit declarations for the additional color outputs + * for broadcasting. + */ + unsigned j; + for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { + /* Allocate a new output index */ + unsigned idx = emit->info.num_outputs + j - 1; + emit->fs.color_out_index[j] = idx; + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT, idx, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + emit->info.output_semantic_index[idx] = j; + } + + emit->fs.num_color_outputs = + emit->key.fs.write_color0_to_n_cbufs; + } + } + else { + assert(!emit->key.fs.write_color0_to_n_cbufs); + } + } + else if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* Fragment depth output */ + emit_fragdepth_output_declaration(emit); + } + else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) { + /* Sample mask output */ + emit_samplemask_output_declaration(emit); + } + else { + assert(!"Bad output semantic name"); + } + } +} + + +/** + * Emit common output declaration for vertex processing. + */ +static void +emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit, + unsigned index, unsigned writemask, + boolean addSignature) +{ + const enum tgsi_semantic semantic_name = + emit->info.output_semantic_name[index]; + const unsigned semantic_index = emit->info.output_semantic_index[index]; + unsigned name, type; + unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + + assert(emit->unit != PIPE_SHADER_FRAGMENT && + emit->unit != PIPE_SHADER_COMPUTE); + + switch (semantic_name) { + case TGSI_SEMANTIC_POSITION: + if (emit->unit == PIPE_SHADER_TESS_CTRL) { + /* position will be declared in control point only */ + assert(emit->tcs.control_point_phase); + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + emit_output_declaration(emit, type, index, name, final_mask, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); + return; + } + else { + type = VGPU10_OPCODE_DCL_OUTPUT_SIV; + name = VGPU10_NAME_POSITION; + } + /* Save the index of the vertex position output register */ + emit->vposition.out_index = index; + break; + case TGSI_SEMANTIC_CLIPDIST: + type = VGPU10_OPCODE_DCL_OUTPUT_SIV; + name = VGPU10_NAME_CLIP_DISTANCE; + /* save the starting index of the clip distance output register */ + if (semantic_index == 0) + emit->clip_dist_out_index = index; + final_mask = apply_clip_plane_mask(emit, writemask, semantic_index); + if (final_mask == 0x0) + return; /* discard this do-nothing declaration */ + break; + case TGSI_SEMANTIC_CLIPVERTEX: + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + emit->clip_vertex_out_index = index; + break; + default: + /* generic output */ + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + } + + emit_output_declaration(emit, type, index, name, final_mask, addSignature, + map_tgsi_semantic_to_sgn_name(semantic_name)); +} + + +/** + * Emit declaration for outputs in vertex shader. + */ +static void +emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + for (i = 0; i < emit->info.num_outputs; i++) { + emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); + } +} + + +/** + * A helper function to determine the writemask for an output + * for the specified stream. + */ +static unsigned +output_writemask_for_stream(unsigned stream, ubyte output_streams, + ubyte output_usagemask) +{ + unsigned i; + unsigned writemask = 0; + + for (i = 0; i < 4; i++) { + if ((output_streams & 0x3) == stream) + writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i); + output_streams >>= 2; + } + return writemask & output_usagemask; +} + + +/** + * Emit declaration for outputs in geometry shader. + */ +static void +emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + VGPU10OpcodeToken0 opcode0; + unsigned numStreamsSupported = 1; + int s; + + if (emit->version >= 50) { + numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components); + } + + /** + * Start emitting from the last stream first, so we end with + * stream 0, so any of the auxiliary output declarations will + * go to stream 0. + */ + for (s = numStreamsSupported-1; s >= 0; s--) { + + if (emit->info.num_stream_output_components[s] == 0) + continue; + + if (emit->version >= 50) { + /* DCL_STREAM stream */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE); + emit_stream_register(emit, s); + end_emit_instruction(emit); + } + + /* emit output primitive topology declaration */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; + opcode0.primitiveTopology = emit->gs.prim_topology; + emit_property_instruction(emit, opcode0, 0, 0); + + for (i = 0; i < emit->info.num_outputs; i++) { + unsigned writemask; + + /* find out the writemask for this stream */ + writemask = output_writemask_for_stream(s, emit->info.output_streams[i], + emit->output_usage_mask[i]); + + if (writemask) { + enum tgsi_semantic semantic_name = + emit->info.output_semantic_name[i]; + + /* TODO: Still need to take care of a special case where a + * single varying spans across multiple output registers. + */ + switch(semantic_name) { + case TGSI_SEMANTIC_PRIMID: + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT_SGV, i, + VGPU10_NAME_PRIMITIVE_ID, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + FALSE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_LAYER: + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT_SIV, i, + VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + FALSE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT_SIV, i, + VGPU10_NAME_VIEWPORT_ARRAY_INDEX, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + FALSE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + emit->gs.viewport_index_out_index = i; + break; + default: + emit_vertex_output_declaration(emit, i, writemask, FALSE); + } + } + } + } + + /* For geometry shader outputs, it is possible the same register is + * declared multiple times for different streams. So to avoid + * redundant signature entries, geometry shader output signature is done + * outside of the declaration. + */ + struct svga_shader_signature *sgn = &emit->signature; + SVGA3dDXShaderSignatureEntry *sgnEntry; + + for (i = 0; i < emit->info.num_outputs; i++) { + if (emit->output_usage_mask[i]) { + enum tgsi_semantic sem_name = emit->info.output_semantic_name[i]; + + sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; + set_shader_signature_entry(sgnEntry, i, + map_tgsi_semantic_to_sgn_name(sem_name), + emit->output_usage_mask[i], + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); + } + } +} + + +/** + * Emit the declaration for the tess inner/outer output. + * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV + * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT + * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value + */ +static void +emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit, + unsigned index, unsigned opcodeType, + unsigned operandType, VGPU10_SYSTEM_NAME name, + SVGA3dDXSignatureSemanticName sgnName) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(emit->version >= 50); + assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR || + (emit->key.tcs.prim_mode == PIPE_PRIM_LINES && + name == VGPU10_NAME_UNDEFINED)); + assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR); + + assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT || + operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = opcodeType; + operand0.operandType = operandType; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); + + /* Capture patch constant signature */ + struct svga_shader_signature *sgn = &emit->signature; + SVGA3dDXShaderSignatureEntry *sgnEntry = + &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; + set_shader_signature_entry(sgnEntry, index, + sgnName, SVGA3DWRITEMASK_0, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); +} + + +/** + * Emit output declarations for tessellation control shader. + */ +static void +emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned int i; + unsigned outputIndex = emit->num_outputs; + struct svga_shader_signature *sgn = &emit->signature; + + /** + * Initialize patch_generic_out_count so it won't be counted twice + * since this function is called twice, one for control point phase + * and another time for patch constant phase. + */ + emit->tcs.patch_generic_out_count = 0; + + for (i = 0; i < emit->info.num_outputs; i++) { + unsigned index = i; + const enum tgsi_semantic semantic_name = + emit->info.output_semantic_name[i]; + + switch (semantic_name) { + case TGSI_SEMANTIC_TESSINNER: + emit->tcs.inner.tgsi_index = i; + + /* skip per-patch output declarations in control point phase */ + if (emit->tcs.control_point_phase) + break; + + emit->tcs.inner.out_index = outputIndex; + switch (emit->key.tcs.prim_mode) { + case PIPE_PRIM_QUADS: + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); + + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); + break; + case PIPE_PRIM_TRIANGLES: + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); + break; + case PIPE_PRIM_LINES: + break; + default: + debug_printf("Unsupported primitive type"); + } + break; + + case TGSI_SEMANTIC_TESSOUTER: + emit->tcs.outer.tgsi_index = i; + + /* skip per-patch output declarations in control point phase */ + if (emit->tcs.control_point_phase) + break; + + emit->tcs.outer.out_index = outputIndex; + switch (emit->key.tcs.prim_mode) { + case PIPE_PRIM_QUADS: + for (int j = 0; j < 4; j++) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j); + } + break; + case PIPE_PRIM_TRIANGLES: + for (int j = 0; j < 3; j++) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j); + } + break; + case PIPE_PRIM_LINES: + for (int j = 0; j < 2; j++) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j); + } + break; + default: + debug_printf("Unsupported primitive type"); + } + break; + + case TGSI_SEMANTIC_PATCH: + if (emit->tcs.patch_generic_out_index == INVALID_INDEX) + emit->tcs.patch_generic_out_index= i; + emit->tcs.patch_generic_out_count++; + + /* skip per-patch output declarations in control point phase */ + if (emit->tcs.control_point_phase) + break; + + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + FALSE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + + SVGA3dDXShaderSignatureEntry *sgnEntry = + &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; + set_shader_signature_entry(sgnEntry, index, + map_tgsi_semantic_to_sgn_name(semantic_name), + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); + + break; + + default: + /* save the starting index of control point outputs */ + if (emit->tcs.control_point_out_index == INVALID_INDEX) + emit->tcs.control_point_out_index = i; + emit->tcs.control_point_out_count++; + + /* skip control point output declarations in patch constant phase */ + if (!emit->tcs.control_point_phase) + break; + + emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], + TRUE); + + } + } + + if (emit->tcs.control_point_phase) { + /** + * Add missing control point output in control point phase. + */ + if (emit->tcs.control_point_out_index == INVALID_INDEX) { + /* use register index after tessellation factors */ + switch (emit->key.tcs.prim_mode) { + case PIPE_PRIM_QUADS: + emit->tcs.control_point_out_index = outputIndex + 6; + break; + case PIPE_PRIM_TRIANGLES: + emit->tcs.control_point_out_index = outputIndex + 4; + break; + default: + emit->tcs.control_point_out_index = outputIndex + 2; + break; + } + emit->tcs.control_point_out_count++; + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, + emit->tcs.control_point_out_index, + VGPU10_NAME_POSITION, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); + + /* If tcs does not output any control point output, + * we can end the hull shader control point phase here + * after emitting the default control point output. + */ + emit->skip_instruction = TRUE; + } + } + else { + if (emit->tcs.outer.out_index == INVALID_INDEX) { + /* since the TCS did not declare out outer tess level output register, + * we declare it here for patch constant phase only. + */ + emit->tcs.outer.out_index = outputIndex; + if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { + for (int i = 0; i < 4; i++) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); + } + } + else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { + for (int i = 0; i < 3; i++) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); + } + } + } + + if (emit->tcs.inner.out_index == INVALID_INDEX) { + /* since the TCS did not declare out inner tess level output register, + * we declare it here + */ + emit->tcs.inner.out_index = outputIndex; + if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); + } + else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { + emit_tesslevel_declaration(emit, outputIndex++, + VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, + VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); + } + } + } + emit->num_outputs = outputIndex; +} + + +/** + * Emit output declarations for tessellation evaluation shader. + */ +static void +emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned int i; + + for (i = 0; i < emit->info.num_outputs; i++) { + emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); + } +} + + +/** + * Emit the declaration for a system value input/output. + */ +static void +emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, + enum tgsi_semantic semantic_name, unsigned index) +{ + switch (semantic_name) { + case TGSI_SEMANTIC_INSTANCEID: + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_INSTANCE_ID, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_VERTEXID: + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_VERTEX_ID, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_SAMPLEID: + assert(emit->unit == PIPE_SHADER_FRAGMENT); + emit->fs.sample_id_sys_index = index; + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_SAMPLE_INDEX, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_CONSTANT, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_SAMPLEPOS: + /* This system value contains the position of the current sample + * when using per-sample shading. We implement this by calling + * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample + * index as the argument. See emit_sample_position_instructions(). + */ + assert(emit->version >= 41); + emit->fs.sample_pos_sys_index = index; + index = alloc_system_value_index(emit, index); + break; + case TGSI_SEMANTIC_INVOCATIONID: + /* Note: invocation id input is mapped to different register depending + * on the shader type. In GS, it will be mapped to vGSInstanceID#. + * In TCS, it will be mapped to vOutputControlPointID#. + * Since in both cases, the mapped name is unique rather than + * just a generic input name ("v#"), so there is no need to remap + * the index value. + */ + assert(emit->unit == PIPE_SHADER_GEOMETRY || + emit->unit == PIPE_SHADER_TESS_CTRL); + assert(emit->version >= 50); + + if (emit->unit == PIPE_SHADER_GEOMETRY) { + emit->gs.invocation_id_sys_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_0_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + 0, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); + } else if (emit->unit == PIPE_SHADER_TESS_CTRL) { + /* The emission of the control point id will be done + * in the control point phase in emit_hull_shader_control_point_phase(). + */ + emit->tcs.invocation_id_sys_index = index; + } + break; + case TGSI_SEMANTIC_SAMPLEMASK: + /* Note: the PS sample mask input has a unique name ("vCoverage#") + * rather than just a generic input name ("v#") so no need to remap the + * index value. + */ + assert(emit->unit == PIPE_SHADER_FRAGMENT); + assert(emit->version >= 50); + emit->fs.sample_mask_in_sys_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_1_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + 0, + VGPU10_INTERPOLATION_CONSTANT, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); + break; + case TGSI_SEMANTIC_TESSCOORD: + assert(emit->version >= 50); + + unsigned usageMask = 0; + + if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { + usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ; + } + else if (emit->tes.prim_mode == PIPE_PRIM_LINES || + emit->tes.prim_mode == PIPE_PRIM_QUADS) { + usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY; + } + + emit->tes.tesscoord_sys_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + usageMask, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); + break; + case TGSI_SEMANTIC_TESSINNER: + assert(emit->version >= 50); + emit->tes.inner.tgsi_index = index; + break; + case TGSI_SEMANTIC_TESSOUTER: + assert(emit->version >= 50); + emit->tes.outer.tgsi_index = index; + break; + case TGSI_SEMANTIC_VERTICESIN: + assert(emit->unit == PIPE_SHADER_TESS_CTRL); + assert(emit->version >= 50); + + /* save the system value index */ + emit->tcs.vertices_per_patch_index = index; + break; + case TGSI_SEMANTIC_PRIMID: + assert(emit->version >= 50); + if (emit->unit == PIPE_SHADER_TESS_CTRL) { + emit->tcs.prim_id_index = index; + } + else if (emit->unit == PIPE_SHADER_TESS_EVAL) { + emit->tes.prim_id_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_0_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + 0, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + } + break; + default: + debug_printf("unexpected system value semantic index %u / %s\n", + semantic_name, tgsi_semantic_names[semantic_name]); + } +} + +/** + * Translate a TGSI declaration to VGPU10. + */ +static boolean +emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_declaration *decl) +{ + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + /* do nothing - see emit_input_declarations() */ + return TRUE; + + case TGSI_FILE_OUTPUT: + assert(decl->Range.First == decl->Range.Last); + emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; + return TRUE; + + case TGSI_FILE_TEMPORARY: + /* Don't declare the temps here. Just keep track of how many + * and emit the declaration later. + */ + if (decl->Declaration.Array) { + /* Indexed temporary array. Save the start index of the array + * and the size of the array. + */ + const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); + assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); + + /* Save this array so we can emit the declaration for it later */ + create_temp_array(emit, arrayID, decl->Range.First, + decl->Range.Last - decl->Range.First + 1, + decl->Range.First); + } + + /* for all temps, indexed or not, keep track of highest index */ + emit->num_shader_temps = MAX2(emit->num_shader_temps, + decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_CONSTANT: + /* Don't declare constants here. Just keep track and emit later. */ + { + unsigned constbuf = 0, num_consts; + if (decl->Declaration.Dimension) { + constbuf = decl->Dim.Index2D; + } + /* We throw an assertion here when, in fact, the shader should never + * have linked due to constbuf index out of bounds, so we shouldn't + * have reached here. + */ + assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); + + num_consts = MAX2(emit->num_shader_consts[constbuf], + decl->Range.Last + 1); + + if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + debug_printf("Warning: constant buffer is declared to size [%u]" + " but [%u] is the limit.\n", + num_consts, + VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + } + /* The linker doesn't enforce the max UBO size so we clamp here */ + emit->num_shader_consts[constbuf] = + MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + } + return TRUE; + + case TGSI_FILE_IMMEDIATE: + assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); + return FALSE; + + case TGSI_FILE_SYSTEM_VALUE: + emit_system_value_declaration(emit, decl->Semantic.Name, + decl->Range.First); + return TRUE; + + case TGSI_FILE_SAMPLER: + /* Don't declare samplers here. Just keep track and emit later. */ + emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); + return TRUE; + +#if 0 + case TGSI_FILE_RESOURCE: + /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ + /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ + assert(!"TGSI_FILE_RESOURCE not handled yet"); + return FALSE; +#endif + + case TGSI_FILE_ADDRESS: + emit->num_address_regs = MAX2(emit->num_address_regs, + decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_SAMPLER_VIEW: + { + unsigned unit = decl->Range.First; + assert(decl->Range.First == decl->Range.Last); + emit->sampler_target[unit] = decl->SamplerView.Resource; + /* Note: we can ignore YZW return types for now */ + emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; + emit->sampler_view[unit] = TRUE; + } + return TRUE; + + default: + assert(!"Unexpected type of declaration"); + return FALSE; + } +} + + + +/** + * Emit input declarations for fragment shader. + */ +static void +emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->linkage.num_inputs; i++) { + enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned type, interpolationMode, name; + unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* fragment position input */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_LINEAR; + name = VGPU10_NAME_POSITION; + if (usage_mask & TGSI_WRITEMASK_W) { + /* we need to replace use of 'w' with '1/w' */ + emit->fs.fragcoord_input_index = i; + } + } + else if (semantic_name == TGSI_SEMANTIC_FACE) { + /* fragment front-facing input */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_IS_FRONT_FACE; + emit->fs.face_input_index = i; + } + else if (semantic_name == TGSI_SEMANTIC_PRIMID) { + /* primitive ID */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_PRIMITIVE_ID; + } + else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) { + /* sample index / ID */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_SAMPLE_INDEX; + } + else if (semantic_name == TGSI_SEMANTIC_LAYER) { + /* render target array index */ + if (emit->key.fs.layer_to_zero) { + /** + * The shader from the previous stage does not write to layer, + * so reading the layer index in fragment shader should return 0. + */ + emit->fs.layer_input_index = i; + continue; + } else { + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; + mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; + } + } + else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { + /* viewport index */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX; + mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; + } + else { + /* general fragment input */ + type = VGPU10_OPCODE_DCL_INPUT_PS; + interpolationMode = + translate_interpolation(emit, + emit->info.input_interpolate[i], + emit->info.input_interpolate_loc[i]); + + /* keeps track if flat interpolation mode is being used */ + emit->uses_flat_interp = emit->uses_flat_interp || + (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); + + name = VGPU10_NAME_UNDEFINED; + } + + emit_input_declaration(emit, type, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, index, 1, + name, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + mask, + interpolationMode, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + } +} + + +/** + * Emit input declarations for vertex shader. + */ +static void +emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = i; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); + } +} + + +/** + * Emit input declarations for geometry shader. + */ +static void +emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->info.num_inputs; i++) { + enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned opcodeType, operandType; + unsigned numComp, selMode; + unsigned name; + unsigned dim; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + opcodeType = VGPU10_OPCODE_DCL_INPUT; + operandType = VGPU10_OPERAND_TYPE_INPUT; + numComp = VGPU10_OPERAND_4_COMPONENT; + selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + name = VGPU10_NAME_UNDEFINED; + + /* all geometry shader inputs are two dimensional except + * gl_PrimitiveID + */ + dim = VGPU10_OPERAND_INDEX_2D; + + if (semantic_name == TGSI_SEMANTIC_PRIMID) { + /* Primitive ID */ + operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + dim = VGPU10_OPERAND_INDEX_0D; + numComp = VGPU10_OPERAND_0_COMPONENT; + selMode = 0; + + /* also save the register index so we can check for + * primitive id when emit src register. We need to modify the + * operand type, index dimension when emit primitive id src reg. + */ + emit->gs.prim_id_index = i; + } + else if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* vertex position input */ + opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; + name = VGPU10_NAME_POSITION; + } + + emit_input_declaration(emit, opcodeType, operandType, + dim, index, + emit->gs.input_size, + name, + numComp, selMode, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + map_tgsi_semantic_to_sgn_name(semantic_name)); + } +} + + +/** + * Emit input declarations for tessellation control shader. + */ +static void +emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + unsigned size = emit->key.tcs.vertices_per_patch; + unsigned indicesMask = 0; + + for (i = 0; i < emit->info.num_inputs; i++) { + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; + VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED; + VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT; + boolean addSignature = TRUE; + + /* indices that are declared */ + indicesMask |= 1 << index; + + if (semantic_name == TGSI_SEMANTIC_POSITION || + index == emit->linkage.position_index) { + /* save the input control point index for later use */ + emit->tcs.control_point_input_index = i; + } + else if (usage_mask == 0) { + continue; /* register is not actually used */ + } + + /* input control points in the patch constant phase are emitted in the + * vicp register rather than the v register. + */ + if (!emit->tcs.control_point_phase) { + operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; + addSignature = emit->tcs.control_point_out_count == 0; + } + + /* Tessellation control shader inputs are two dimensional. + * The array size is determined by the patch vertex count. + */ + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + operandType, + VGPU10_OPERAND_INDEX_2D, + index, size, name, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, + addSignature, + map_tgsi_semantic_to_sgn_name(semantic_name)); + + } + + if (emit->tcs.control_point_phase) { + if (emit->tcs.control_point_input_index == INVALID_INDEX) { + + /* Add input control point declaration if it does not exist */ + if ((indicesMask & (1 << emit->linkage.position_index)) == 0) { + emit->linkage.input_map[emit->linkage.num_inputs] = + emit->linkage.position_index; + emit->tcs.control_point_input_index = emit->linkage.num_inputs++; + + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_2D, + emit->linkage.position_index, + emit->key.tcs.vertices_per_patch, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); + } + } + + /* Also add an address register for the indirection to the + * input control points + */ + emit->tcs.control_point_addr_index = emit->num_address_regs++; + } +} + + +static void +emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + + /* In tcs, tess factors are emitted as extra outputs. + * The starting register index for the tess factors is captured + * in the compile key. + */ + unsigned inputIndex = emit->key.tes.tessfactor_index; + + if (emit->tes.prim_mode == PIPE_PRIM_QUADS) { + if (emit->key.tes.need_tessouter) { + emit->tes.outer.in_index = inputIndex; + for (int i = 0; i < 4; i++) { + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); + } + } + + if (emit->key.tes.need_tessinner) { + emit->tes.inner.in_index = inputIndex; + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); + + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); + } + } + else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { + if (emit->key.tes.need_tessouter) { + emit->tes.outer.in_index = inputIndex; + for (int i = 0; i < 3; i++) { + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); + } + } + + if (emit->key.tes.need_tessinner) { + emit->tes.inner.in_index = inputIndex; + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); + } + } + else if (emit->tes.prim_mode == PIPE_PRIM_LINES) { + if (emit->key.tes.need_tessouter) { + emit->tes.outer.in_index = inputIndex; + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR); + + emit_tesslevel_declaration(emit, inputIndex++, + VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR, + SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR); + } + } +} + + +/** + * Emit input declarations for tessellation evaluation shader. + */ +static void +emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->info.num_inputs; i++) { + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned size; + const enum tgsi_semantic semantic_name = + emit->info.input_semantic_name[i]; + SVGA3dDXSignatureSemanticName sgn_name; + VGPU10_OPERAND_TYPE operandType; + VGPU10_OPERAND_INDEX_DIMENSION dim; + + if (usage_mask == 0) + usage_mask = 1; /* at least set usage mask to one */ + + if (semantic_name == TGSI_SEMANTIC_PATCH) { + operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; + dim = VGPU10_OPERAND_INDEX_1D; + size = 1; + sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name); + } + else { + operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; + dim = VGPU10_OPERAND_INDEX_2D; + size = emit->key.tes.vertices_per_patch; + sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; + } + + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType, + dim, index, size, VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, + TRUE, sgn_name); + } + + emit_tessfactor_input_declarations(emit); +} + + +/** + * Emit all input declarations. + */ +static boolean +emit_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + emit->index_range.required = + emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; + + switch (emit->unit) { + case PIPE_SHADER_FRAGMENT: + emit_fs_input_declarations(emit); + break; + case PIPE_SHADER_GEOMETRY: + emit_gs_input_declarations(emit); + break; + case PIPE_SHADER_VERTEX: + emit_vs_input_declarations(emit); + break; + case PIPE_SHADER_TESS_CTRL: + emit_tcs_input_declarations(emit); + break; + case PIPE_SHADER_TESS_EVAL: + emit_tes_input_declarations(emit); + break; + case PIPE_SHADER_COMPUTE: + //XXX emit_cs_input_declarations(emit); + break; + default: + assert(0); + } + + if (emit->index_range.start_index != INVALID_INDEX) { + emit_index_range_declaration(emit); + } + emit->index_range.required = FALSE; + return TRUE; +} + + +/** + * Emit all output declarations. + */ +static boolean +emit_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + emit->index_range.required = + emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; + + switch (emit->unit) { + case PIPE_SHADER_FRAGMENT: + emit_fs_output_declarations(emit); + break; + case PIPE_SHADER_GEOMETRY: + emit_gs_output_declarations(emit); + break; + case PIPE_SHADER_VERTEX: + emit_vs_output_declarations(emit); + break; + case PIPE_SHADER_TESS_CTRL: + emit_tcs_output_declarations(emit); + break; + case PIPE_SHADER_TESS_EVAL: + emit_tes_output_declarations(emit); + break; + case PIPE_SHADER_COMPUTE: + //XXX emit_cs_output_declarations(emit); + break; + default: + assert(0); + } + + if (emit->vposition.so_index != INVALID_INDEX && + emit->vposition.out_index != INVALID_INDEX) { + + assert(emit->unit != PIPE_SHADER_FRAGMENT); + + /* Emit the declaration for the non-adjusted vertex position + * for stream output purpose + */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->vposition.so_index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); + } + + if (emit->clip_dist_so_index != INVALID_INDEX && + emit->clip_dist_out_index != INVALID_INDEX) { + + assert(emit->unit != PIPE_SHADER_FRAGMENT); + + /* Emit the declaration for the clip distance shadow copy which + * will be used for stream output purpose and for clip distance + * varying variable + */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->clip_dist_so_index, + VGPU10_NAME_UNDEFINED, + emit->output_usage_mask[emit->clip_dist_out_index], + TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); + + if (emit->info.num_written_clipdistance > 4) { + /* for the second clip distance register, each handles 4 planes */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->clip_dist_so_index + 1, + VGPU10_NAME_UNDEFINED, + emit->output_usage_mask[emit->clip_dist_out_index+1], + TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); + } + } + + if (emit->index_range.start_index != INVALID_INDEX) { + emit_index_range_declaration(emit); + } + emit->index_range.required = FALSE; + return TRUE; +} + + +/** + * A helper function to create a temporary indexable array + * and initialize the corresponding entries in the temp_map array. + */ +static void +create_temp_array(struct svga_shader_emitter_v10 *emit, + unsigned arrayID, unsigned first, unsigned count, + unsigned startIndex) +{ + unsigned i, tempIndex = startIndex; + + emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); + assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); + emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); + + emit->temp_arrays[arrayID].start = first; + emit->temp_arrays[arrayID].size = count; + + /* Fill in the temp_map entries for this temp array */ + for (i = 0; i < count; i++, tempIndex++) { + emit->temp_map[tempIndex].arrayId = arrayID; + emit->temp_map[tempIndex].index = i; + } +} + + +/** + * Emit the declaration for the temporary registers. + */ +static boolean +emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) +{ + unsigned total_temps, reg, i; + + total_temps = emit->num_shader_temps; + + /* If there is indirect access to non-indexable temps in the shader, + * convert those temps to indexable temps. This works around a bug + * in the GLSL->TGSI translator exposed in piglit test + * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. + * Internal temps added by the driver remain as non-indexable temps. + */ + if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && + emit->num_temp_arrays == 0) { + create_temp_array(emit, 1, 0, total_temps, 0); + } + + /* Allocate extra temps for specially-implemented instructions, + * such as LIT. + */ + total_temps += MAX_INTERNAL_TEMPS; + + /* Allocate extra temps for clip distance or clip vertex. + */ + if (emit->clip_mode == CLIP_DISTANCE) { + /* We need to write the clip distance to a temporary register + * first. Then it will be copied to the shadow copy for + * the clip distance varying variable and stream output purpose. + * It will also be copied to the actual CLIPDIST register + * according to the enabled clip planes + */ + emit->clip_dist_tmp_index = total_temps++; + if (emit->info.num_written_clipdistance > 4) + total_temps++; /* second clip register */ + } + else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) { + /* If the current shader is in the last vertex processing stage, + * We need to convert the TGSI CLIPVERTEX output to one or more + * clip distances. Allocate a temp reg for the clipvertex here. + */ + assert(emit->info.writes_clipvertex > 0); + emit->clip_vertex_tmp_index = total_temps; + total_temps++; + } + + if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { + if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || + emit->key.clip_plane_enable || + emit->vposition.so_index != INVALID_INDEX) { + emit->vposition.tmp_index = total_temps; + total_temps += 1; + } + + if (emit->vposition.need_prescale) { + emit->vposition.prescale_scale_index = total_temps++; + emit->vposition.prescale_trans_index = total_temps++; + } + + if (emit->unit == PIPE_SHADER_VERTEX) { + unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | emit->key.vs.adjust_attrib_itof | emit->key.vs.adjust_attrib_utof | emit->key.vs.attrib_is_bgra | @@ -2884,25 +5210,9 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) emit->vs.adjusted_input[index] = total_temps++; } } - - if (emit->clip_mode == CLIP_DISTANCE) { - /* We need to write the clip distance to a temporary register - * first. Then it will be copied to the shadow copy for - * the clip distance varying variable and stream output purpose. - * It will also be copied to the actual CLIPDIST register - * according to the enabled clip planes - */ - emit->clip_dist_tmp_index = total_temps++; - if (emit->info.num_written_clipdistance > 4) - total_temps++; /* second clip register */ - } - else if (emit->clip_mode == CLIP_VERTEX) { - /* We need to convert the TGSI CLIPVERTEX output to one or more - * clip distances. Allocate a temp reg for the clipvertex here. - */ - assert(emit->info.writes_clipvertex > 0); - emit->clip_vertex_tmp_index = total_temps; - total_temps++; + else if (emit->unit == PIPE_SHADER_GEOMETRY) { + if (emit->key.gs.writes_viewport_index) + emit->gs.viewport_index_tmp_index = total_temps++; } } else if (emit->unit == PIPE_SHADER_FRAGMENT) { @@ -2930,6 +5240,63 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) emit->fs.sample_pos_tmp_index = total_temps++; } } + else if (emit->unit == PIPE_SHADER_TESS_EVAL) { + if (emit->vposition.need_prescale) { + emit->vposition.tmp_index = total_temps++; + emit->vposition.prescale_scale_index = total_temps++; + emit->vposition.prescale_trans_index = total_temps++; + } + + if (emit->tes.inner.tgsi_index) { + emit->tes.inner.temp_index = total_temps; + total_temps += 1; + } + + if (emit->tes.outer.tgsi_index) { + emit->tes.outer.temp_index = total_temps; + total_temps += 1; + } + } + else if (emit->unit == PIPE_SHADER_TESS_CTRL) { + if (emit->tcs.inner.tgsi_index != INVALID_INDEX) { + if (!emit->tcs.control_point_phase) { + emit->tcs.inner.temp_index = total_temps; + total_temps += 1; + } + } + if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { + if (!emit->tcs.control_point_phase) { + emit->tcs.outer.temp_index = total_temps; + total_temps += 1; + } + } + + if (emit->tcs.control_point_phase && + emit->info.reads_pervertex_outputs) { + emit->tcs.control_point_tmp_index = total_temps; + total_temps += emit->tcs.control_point_out_count; + } + else if (!emit->tcs.control_point_phase && + emit->info.reads_perpatch_outputs) { + + /* If there is indirect access to the patch constant outputs + * in the control point phase, then an indexable temporary array + * will be created for these patch constant outputs. + * Note, indirect access can only be applicable to + * patch constant outputs in the control point phase. + */ + if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { + unsigned arrayID = + emit->num_temp_arrays ? emit->num_temp_arrays : 1; + create_temp_array(emit, arrayID, 0, + emit->tcs.patch_generic_out_count, total_temps); + } + emit->tcs.patch_generic_tmp_index = total_temps; + total_temps += emit->tcs.patch_generic_out_count; + } + + emit->tcs.invocation_id_tmp_index = total_temps++; + } for (i = 0; i < emit->num_address_regs; i++) { emit->address_reg_index[i] = total_temps++; @@ -3065,8 +5432,8 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) /* Vertex position scale/translation */ if (emit->vposition.need_prescale) { - emit->vposition.prescale_scale_index = total_consts++; - emit->vposition.prescale_trans_index = total_consts++; + emit->vposition.prescale_cbuf_index = total_consts; + total_consts += (2 * emit->vposition.num_prescale); } if (emit->unit == PIPE_SHADER_VERTEX) { @@ -3078,8 +5445,8 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) /* user-defined clip planes */ if (emit->key.clip_plane_enable) { unsigned n = util_bitcount(emit->key.clip_plane_enable); - assert(emit->unit == PIPE_SHADER_VERTEX || - emit->unit == PIPE_SHADER_GEOMETRY); + assert(emit->unit != PIPE_SHADER_FRAGMENT && + emit->unit != PIPE_SHADER_COMPUTE); for (i = 0; i < n; i++) { emit->clip_plane_const[i] = total_consts++; } @@ -3309,34 +5676,48 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) return TRUE; } +/** + * Emit instruction with n=1, 2 or 3 source registers. + */ static void -emit_instruction_op1(struct svga_shader_emitter_v10 *emit, - VGPU10_OPCODE_TYPE opcode, +emit_instruction_opn(struct svga_shader_emitter_v10 *emit, + unsigned opcode, const struct tgsi_full_dst_register *dst, - const struct tgsi_full_src_register *src, - boolean saturate) + const struct tgsi_full_src_register *src1, + const struct tgsi_full_src_register *src2, + const struct tgsi_full_src_register *src3, + boolean saturate, bool precise) { begin_emit_instruction(emit); - emit_opcode(emit, opcode, saturate); + emit_opcode_precise(emit, opcode, saturate, precise); emit_dst_register(emit, dst); - emit_src_register(emit, src); + emit_src_register(emit, src1); + if (src2) { + emit_src_register(emit, src2); + } + if (src3) { + emit_src_register(emit, src3); + } end_emit_instruction(emit); } +static void +emit_instruction_op1(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE); +} + static void emit_instruction_op2(struct svga_shader_emitter_v10 *emit, VGPU10_OPCODE_TYPE opcode, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src1, - const struct tgsi_full_src_register *src2, - boolean saturate) + const struct tgsi_full_src_register *src2) { - begin_emit_instruction(emit); - emit_opcode(emit, opcode, saturate); - emit_dst_register(emit, dst); - emit_src_register(emit, src1); - emit_src_register(emit, src2); - end_emit_instruction(emit); + emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE); } static void @@ -3345,18 +5726,114 @@ emit_instruction_op3(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src1, const struct tgsi_full_src_register *src2, - const struct tgsi_full_src_register *src3, - boolean saturate) + const struct tgsi_full_src_register *src3) +{ + emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE); +} + +static void +emit_instruction_op0(struct svga_shader_emitter_v10 *emit, + VGPU10_OPCODE_TYPE opcode) { begin_emit_instruction(emit); - emit_opcode(emit, opcode, saturate); - emit_dst_register(emit, dst); - emit_src_register(emit, src1); - emit_src_register(emit, src2); - emit_src_register(emit, src3); + emit_opcode(emit, opcode, FALSE); end_emit_instruction(emit); } +/** + * Tessellation inner/outer levels needs to be store into its + * appropriate registers depending on prim_mode. + */ +static void +store_tesslevels(struct svga_shader_emitter_v10 *emit) +{ + int i; + + /* tessellation levels are required input/out in hull shader. + * emitting the inner/outer tessellation levels, either from + * values provided in tcs or fallback default values which is 1.0 + */ + if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { + struct tgsi_full_src_register temp_src; + + if (emit->tcs.inner.tgsi_index != INVALID_INDEX) + temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); + else + temp_src = make_immediate_reg_float(emit, 1.0f); + + for (i = 0; i < 2; i++) { + struct tgsi_full_src_register src = + scalar_src(&temp_src, TGSI_SWIZZLE_X + i); + struct tgsi_full_dst_register dst = + make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + } + + if (emit->tcs.outer.tgsi_index != INVALID_INDEX) + temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); + else + temp_src = make_immediate_reg_float(emit, 1.0f); + + for (i = 0; i < 4; i++) { + struct tgsi_full_src_register src = + scalar_src(&temp_src, TGSI_SWIZZLE_X + i); + struct tgsi_full_dst_register dst = + make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + } + } + else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { + struct tgsi_full_src_register temp_src; + + if (emit->tcs.inner.tgsi_index != INVALID_INDEX) + temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); + else + temp_src = make_immediate_reg_float(emit, 1.0f); + + struct tgsi_full_src_register src = + scalar_src(&temp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register dst = + make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + + if (emit->tcs.outer.tgsi_index != INVALID_INDEX) + temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); + else + temp_src = make_immediate_reg_float(emit, 1.0f); + + for (i = 0; i < 3; i++) { + struct tgsi_full_src_register src = + scalar_src(&temp_src, TGSI_SWIZZLE_X + i); + struct tgsi_full_dst_register dst = + make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + } + } + else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { + if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { + struct tgsi_full_src_register temp_src = + make_src_temp_reg(emit->tcs.outer.temp_index); + for (i = 0; i < 2; i++) { + struct tgsi_full_src_register src = + scalar_src(&temp_src, TGSI_SWIZZLE_X + i); + struct tgsi_full_dst_register dst = + make_dst_reg(TGSI_FILE_OUTPUT, + emit->tcs.outer.out_index + i); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + } + } + } + else { + debug_printf("Unsupported primitive type"); + } +} + + /** * Emit the actual clip distance instructions to be used for clipping * by copying the clip distance from the temporary registers to the @@ -3399,7 +5876,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) /* MOV clip_dist_so, tmp_clip_dist */ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, - &tmp_clip_dist_src, FALSE); + &tmp_clip_dist_src); /** * copy those clip distances to enabled clipping planes @@ -3412,7 +5889,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) /* MOV CLIPDIST, tmp_clip_dist */ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, - &tmp_clip_dist_src, FALSE); + &tmp_clip_dist_src); } /* four clip planes per clip register */ clip_plane_enable >>= 4; @@ -3434,8 +5911,7 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) unsigned index = emit->num_outputs; unsigned plane_mask; - assert(emit->unit == PIPE_SHADER_VERTEX || - emit->unit == PIPE_SHADER_GEOMETRY); + assert(emit->unit != PIPE_SHADER_FRAGMENT); assert(num_clip_planes <= 8); if (emit->clip_mode != CLIP_LEGACY && @@ -3446,6 +5922,10 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) if (num_clip_planes == 0) return; + /* Convert clip vertex to clip distances only in the last vertex stage */ + if (!emit->key.last_vertex_stage) + return; + /* Declare one or two clip output registers. The number of components * in the mask reflects the number of clip planes. For example, if 5 * clip planes are needed, we'll declare outputs similar to: @@ -3458,13 +5938,15 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) if (plane_mask & 0xf) { unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, - VGPU10_NAME_CLIP_DISTANCE, cmask); + VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); emit->num_outputs++; } if (plane_mask & 0xf0) { unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, - VGPU10_NAME_CLIP_DISTANCE, cmask); + VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, + SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); emit->num_outputs++; } } @@ -3488,7 +5970,8 @@ emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, assert(num_clip_planes <= 8); assert(emit->unit == PIPE_SHADER_VERTEX || - emit->unit == PIPE_SHADER_GEOMETRY); + emit->unit == PIPE_SHADER_GEOMETRY || + emit->unit == PIPE_SHADER_TESS_EVAL); for (i = 0; i < num_clip_planes; i++) { struct tgsi_full_dst_register dst; @@ -3506,7 +5989,7 @@ emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, /* DP4 clip_dist, plane, vpos */ emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, - &plane_src, &vpos_src, FALSE); + &plane_src, &vpos_src); } } @@ -3527,7 +6010,8 @@ emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; assert(emit->unit == PIPE_SHADER_VERTEX || - emit->unit == PIPE_SHADER_GEOMETRY); + emit->unit == PIPE_SHADER_GEOMETRY || + emit->unit == PIPE_SHADER_TESS_EVAL); assert(emit->clip_mode == CLIP_VERTEX); @@ -3547,7 +6031,7 @@ emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) /* DP4 clip_dist, plane, vpos */ emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, - &plane_src, &clipvert_src, FALSE); + &plane_src, &clipvert_src); } /* copy temporary clip vertex register to the clip vertex register */ @@ -3564,7 +6048,7 @@ emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) /* MOV clip_vertex, clip_vertex_tmp */ dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &dst, &clipvert_src, FALSE); + &dst, &clipvert_src); /** * set the temporary clip vertex register index back to the @@ -3613,20 +6097,18 @@ emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); /* val = src * 2.0 */ - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, - src, &two, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two); /* bias = src > 0.5 */ - emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, - src, &half, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half); /* bias = bias & -2.0 */ emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, - &bias_src, &neg_two, FALSE); + &bias_src, &neg_two); /* dst = val + bias */ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, - &val_src, &bias_src, FALSE); + &val_src, &bias_src); free_temp_indexes(emit); } @@ -3642,7 +6124,7 @@ emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); /* dst = src * scale */ - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale); } @@ -3671,10 +6153,10 @@ emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, * dst = i_to_f(r,g,b,a); # convert to float */ emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, - &src_xxxx, &lshift, FALSE); + &src_xxxx, &lshift); emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, - &tmp_src, &rshift, FALSE); - emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); + &tmp_src, &rshift); + emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src); free_temp_indexes(emit); } @@ -3693,6 +6175,7 @@ emit_arl_uarl(struct svga_shader_emitter_v10 *emit, assert(index < MAX_VGPU10_ADDR_REGS); dst = make_dst_temp_reg(emit->address_reg_index[index]); + dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask); /* ARL dst, s0 * Translates into: @@ -3707,7 +6190,7 @@ emit_arl_uarl(struct svga_shader_emitter_v10 *emit, else opcode = VGPU10_OPCODE_MOV; - emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); + emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]); return TRUE; } @@ -3751,7 +6234,7 @@ emit_iabs(struct svga_shader_emitter_v10 *emit, */ struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], - &inst->Src[0], &neg_src, FALSE); + &inst->Src[0], &neg_src); return TRUE; } @@ -3778,11 +6261,12 @@ emit_cmp(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, - &inst->Src[0], &zero, FALSE); - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], + emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst, + &inst->Src[0], &zero, NULL, FALSE, + inst->Instruction.Precise); + emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, &inst->Src[1], &inst->Src[2], - inst->Instruction.Saturate); + inst->Instruction.Saturate, FALSE); free_temp_indexes(emit); @@ -3827,7 +6311,7 @@ emit_dst(struct svga_shader_emitter_v10 *emit, writemask_dst(&move_dst, TGSI_WRITEMASK_X); struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); } /* MUL dst.y, s0.y, s1.y */ @@ -3835,8 +6319,9 @@ emit_dst(struct svga_shader_emitter_v10 *emit, struct tgsi_full_dst_register dst_y = writemask_dst(&move_dst, TGSI_WRITEMASK_Y); - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, - &s1_yyyy, inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, + &s1_yyyy, NULL, inst->Instruction.Saturate, + inst->Instruction.Precise); } /* MOV dst.z, s0.z */ @@ -3844,8 +6329,10 @@ emit_dst(struct svga_shader_emitter_v10 *emit, struct tgsi_full_dst_register dst_z = writemask_dst(&move_dst, TGSI_WRITEMASK_Z); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, - inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &dst_z, &s0_zzzz, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); } /* MOV dst.w, s1.w */ @@ -3853,18 +6340,30 @@ emit_dst(struct svga_shader_emitter_v10 *emit, struct tgsi_full_dst_register dst_w = writemask_dst(&move_dst, TGSI_WRITEMASK_W); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, - inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &dst_w, &s1_wwww, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); } - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, - FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); free_temp_indexes(emit); return TRUE; } +/** + * A helper function to return the stream index as specified in + * the immediate register + */ +static inline unsigned +find_stream_index(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_src_register *src) +{ + return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int; +} + /** * Emit code for TGSI_OPCODE_ENDPRIM (GS only) @@ -3875,11 +6374,25 @@ emit_endprim(struct svga_shader_emitter_v10 *emit, { assert(emit->unit == PIPE_SHADER_GEOMETRY); - /* We can't use emit_simple() because the TGSI instruction has one - * operand (vertex stream number) which we must ignore for VGPU10. - */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); + if (emit->version >= 50) { + unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); + + if (emit->info.num_stream_output_components[streamIndex] == 0) { + /** + * If there is no output for this stream, discard this instruction. + */ + emit->discard_instruction = TRUE; + } + else { + emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE); + assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); + emit_stream_register(emit, streamIndex); + } + } + else { + emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); + } end_emit_instruction(emit); return TRUE; } @@ -3904,8 +6417,10 @@ emit_ex2(struct svga_shader_emitter_v10 *emit, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); /* EXP tmp, s0.xxxx */ - emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, - inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, + NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); return TRUE; } @@ -3945,15 +6460,17 @@ emit_exp(struct svga_shader_emitter_v10 *emit, /* ROUND_NI tmp.x, s0.x */ emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, - &src_xxxx, FALSE); /* round to -infinity */ + &src_xxxx); /* round to -infinity */ /* EXP dst.x, tmp.x */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { struct tgsi_full_dst_register dst_x = writemask_dst(&move_dst, TGSI_WRITEMASK_X); - emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, - inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, + NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); } /* ADD dst.y, s0.x, -tmp */ @@ -3962,8 +6479,10 @@ emit_exp(struct svga_shader_emitter_v10 *emit, writemask_dst(&move_dst, TGSI_WRITEMASK_Y); struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); - emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, - &neg_tmp_src, inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, + &neg_tmp_src, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); } /* EXP dst.z, s0.x */ @@ -3971,8 +6490,10 @@ emit_exp(struct svga_shader_emitter_v10 *emit, struct tgsi_full_dst_register dst_z = writemask_dst(&move_dst, TGSI_WRITEMASK_Z); - emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, - inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, + NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); } /* MOV dst.w, 1.0 */ @@ -3981,12 +6502,10 @@ emit_exp(struct svga_shader_emitter_v10 *emit, writemask_dst(&move_dst, TGSI_WRITEMASK_W); struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, - FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); } - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, - FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); free_temp_indexes(emit); @@ -3999,14 +6518,14 @@ emit_exp(struct svga_shader_emitter_v10 *emit, */ static boolean emit_if(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_src_register *src) { VGPU10OpcodeToken0 opcode0; /* The src register should be a scalar */ - assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && - inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && - inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); + assert(src->Register.SwizzleX == src->Register.SwizzleY && + src->Register.SwizzleX == src->Register.SwizzleZ && + src->Register.SwizzleX == src->Register.SwizzleW); /* The only special thing here is that we need to set the * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if @@ -4018,7 +6537,7 @@ emit_if(struct svga_shader_emitter_v10 *emit, begin_emit_instruction(emit); emit_dword(emit, opcode0.value); - emit_src_register(emit, &inst->Src[0]); + emit_src_register(emit, src); end_emit_instruction(emit); return TRUE; @@ -4045,8 +6564,7 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit, scalar_src(&tmp_src, TGSI_SWIZZLE_X); /* tmp = src[0] < 0.0 */ - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], - &zero, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero); if (!same_swizzle_terms(&inst->Src[0])) { /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to @@ -4061,11 +6579,11 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit, scalar_src(&tmp_src, TGSI_SWIZZLE_W); emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, - &tmp_src_yyyy, FALSE); + &tmp_src_yyyy); emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, - &tmp_src_zzzz, FALSE); + &tmp_src_zzzz); emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, - &tmp_src_wwww, FALSE); + &tmp_src_wwww); } begin_emit_instruction(emit); @@ -4117,8 +6635,10 @@ emit_lg2(struct svga_shader_emitter_v10 *emit, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); /* LOG tmp, s0.xxxx */ - emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, - inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_LOG, + &inst->Dst[0], &src_xxxx, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); return TRUE; } @@ -4152,14 +6672,14 @@ emit_lit(struct svga_shader_emitter_v10 *emit, if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { struct tgsi_full_dst_register dst_x = writemask_dst(&move_dst, TGSI_WRITEMASK_X); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); } /* MOV dst.w, 1.0 */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { struct tgsi_full_dst_register dst_w = writemask_dst(&move_dst, TGSI_WRITEMASK_W); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); } /* MAX dst.y, src.x, 0.0 */ @@ -4172,8 +6692,8 @@ emit_lit(struct svga_shader_emitter_v10 *emit, swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); - emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, - &zero, inst->Instruction.Saturate); + emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, + &zero, NULL, inst->Instruction.Saturate, FALSE); } /* @@ -4208,57 +6728,368 @@ emit_lit(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); - struct tgsi_full_src_register src_xxxx = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); - struct tgsi_full_src_register src_yyyy = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); - struct tgsi_full_src_register src_wwww = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src_wwww = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register lowerbound = + make_immediate_reg_float(emit, -128.0f); + struct tgsi_full_src_register upperbound = + make_immediate_reg_float(emit, 128.0f); + + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, + &lowerbound); + emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, + &upperbound); + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, + &zero); + + /* POW tmp1, tmp2, tmp1 */ + /* LOG tmp2, tmp2 */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src); + + /* MUL tmp1, tmp2, tmp1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, + &tmp1_src); + + /* EXP tmp1, tmp1 */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src); + + /* EQ tmp2, 0, src.w */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww); + /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, + &tmp2_src, &one, &tmp1_src); + + /* LT tmp2, 0, src.x */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx); + /* MOVC dst.z, tmp2, tmp1, 0.0 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, + &tmp2_src, &tmp1_src, &zero); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit Level Of Detail Query (LODQ) instruction. + */ +static boolean +emit_lodq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + + assert(emit->version >= 41); + + /* LOD dst, coord, resource, sampler */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &inst->Src[0]); /* coord */ + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LOG instruction. + */ +static boolean +emit_log(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = floor(lg2(abs(s0.x))) + * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) + * dst.z = lg2(abs(s0.x)) + * dst.w = 1.0 + */ + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); + + /* only use X component of temp reg */ + tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* LOG tmp.x, abs(s0.x) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx); + } + + /* MOV dst.z, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); + + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &dst_z, &tmp_src, NULL, NULL, + inst->Instruction.Saturate, FALSE); + } + + /* FLR tmp.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { + emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src); + } + + /* MOV dst.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); + + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &dst_x, &tmp_src, NULL, NULL, + inst->Instruction.Saturate, FALSE); + } + + /* EXP tmp.x, tmp.x */ + /* DIV dst.y, abs(s0.x), tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src); + emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, + &tmp_src, NULL, inst->Instruction.Saturate, FALSE); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LRP instruction. + */ +static boolean +emit_lrp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = LRP(s0, s1, s2): + * dst = s0 * (s1 - s2) + s2 + * Translates into: + * SUB tmp, s1, s2; tmp = s1 - s2 + * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); + struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); + struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); + + /* ADD tmp, s1, -s2 */ + emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp, + &inst->Src[1], &neg_src2, NULL, FALSE, + inst->Instruction.Precise); + + /* MAD dst, s1, tmp, s3 */ + emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], + &inst->Src[0], &src_tmp, &inst->Src[2], + inst->Instruction.Saturate, + inst->Instruction.Precise); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_POW instruction. + */ +static boolean +emit_pow(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_POW computes only one value from src0.x and + * src1.x while VGPU10 computes four values. + * + * dst = POW(src0, src1): + * dst.xyzw = src0.x ^ src1.x + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register src0_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + struct tgsi_full_src_register src1_xxxx = + swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* LOG tmp, s0.xxxx */ + emit_instruction_opn(emit, VGPU10_OPCODE_LOG, + &tmp_dst, &src0_xxxx, NULL, NULL, + FALSE, inst->Instruction.Precise); + + /* MUL tmp, tmp, s1.xxxx */ + emit_instruction_opn(emit, VGPU10_OPCODE_MUL, + &tmp_dst, &tmp_src, &src1_xxxx, NULL, + FALSE, inst->Instruction.Precise); + + /* EXP tmp, s0.xxxx */ + emit_instruction_opn(emit, VGPU10_OPCODE_EXP, + &inst->Dst[0], &tmp_src, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); + + /* free tmp */ + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. + */ +static boolean +emit_rcp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + if (emit->version >= 50) { + /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise + * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need + * to manipulate the src register's swizzle. + */ + struct tgsi_full_src_register src = inst->Src[0]; + src.Register.SwizzleY = + src.Register.SwizzleZ = + src.Register.SwizzleW = src.Register.SwizzleX; + + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_RCP, + inst->Instruction.Saturate, + inst->Instruction.Precise); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &src); + end_emit_instruction(emit); + } + else { + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* DIV tmp.x, 1.0, s0 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DIV, + &tmp_dst_x, &one, &inst->Src[0], NULL, + FALSE, inst->Instruction.Precise); + + /* MOV dst, tmp.xxxx */ + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); + + free_temp_indexes(emit); + } + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RSQ instruction. + */ +static boolean +emit_rsq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = RSQ(src): + * dst.xyzw = 1 / sqrt(src.x) + * Translates into: + * RSQ tmp, src.x + * MOV dst, tmp.xxxx + */ + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); - struct tgsi_full_src_register zero = - make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register lowerbound = - make_immediate_reg_float(emit, -128.0f); - struct tgsi_full_src_register upperbound = - make_immediate_reg_float(emit, 128.0f); + /* RSQ tmp, src.x */ + emit_instruction_opn(emit, VGPU10_OPCODE_RSQ, + &tmp_dst_x, &inst->Src[0], NULL, NULL, + FALSE, inst->Instruction.Precise); - emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, - &lowerbound, FALSE); - emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, - &upperbound, FALSE); - emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, - &zero, FALSE); + /* MOV dst, tmp.xxxx */ + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); - /* POW tmp1, tmp2, tmp1 */ - /* LOG tmp2, tmp2 */ - emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, - FALSE); + /* free tmp */ + free_temp_indexes(emit); - /* MUL tmp1, tmp2, tmp1 */ - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, - &tmp1_src, FALSE); + return TRUE; +} - /* EXP tmp1, tmp1 */ - emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, - FALSE); - /* EQ tmp2, 0, src.w */ - emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, - &src_wwww, FALSE); - /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, - &tmp2_src, &one, &tmp1_src, FALSE); +/** + * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. + */ +static boolean +emit_seq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SEQ(s0, s1): + * dst = s0 == s1 ? 1.0 : 0.0 (per component) + * Translates into: + * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - /* LT tmp2, 0, src.x */ - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, - &src_xxxx, FALSE); - /* MOVC dst.z, tmp2, tmp1, 0.0 */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, - &tmp2_src, &tmp1_src, &zero, FALSE); - } + /* EQ tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], + &inst->Src[1]); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, - FALSE); free_temp_indexes(emit); return TRUE; @@ -4266,105 +7097,107 @@ emit_lit(struct svga_shader_emitter_v10 *emit, /** - * Emit Level Of Detail Query (LODQ) instruction. + * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. */ static boolean -emit_lodq(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_sge(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; + /* dst = SGE(s0, s1): + * dst = s0 >= s1 ? 1.0 : 0.0 (per component) + * Translates into: + * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - assert(emit->version >= 41); + /* GE tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], + &inst->Src[1]); - /* LOD dst, coord, resource, sampler */ - begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); - emit_dst_register(emit, &inst->Dst[0]); - emit_src_register(emit, &inst->Src[0]); /* coord */ - emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); - end_emit_instruction(emit); + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero); + + free_temp_indexes(emit); return TRUE; } /** - * Emit code for TGSI_OPCODE_LOG instruction. + * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. */ static boolean -emit_log(struct svga_shader_emitter_v10 *emit, +emit_sgt(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - /* - * dst.x = floor(lg2(abs(s0.x))) - * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) - * dst.z = lg2(abs(s0.x)) - * dst.w = 1.0 + /* dst = SGT(s0, s1): + * dst = s0 > s1 ? 1.0 : 0.0 (per component) + * Translates into: + * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) */ - - struct tgsi_full_src_register src_xxxx = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - /* only use X component of temp reg */ - tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); + /* LT tmp, s1, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], + &inst->Src[0]); - /* LOG tmp.x, abs(s0.x) */ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { - emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, - &abs_src_xxxx, FALSE); - } + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero); - /* MOV dst.z, tmp.x */ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { - struct tgsi_full_dst_register dst_z = - writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); + free_temp_indexes(emit); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, - &tmp_src, inst->Instruction.Saturate); - } + return TRUE; +} - /* FLR tmp.x, tmp.x */ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { - emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, - &tmp_src, FALSE); - } - /* MOV dst.x, tmp.x */ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - struct tgsi_full_dst_register dst_x = - writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); +/** + * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. + */ +static boolean +emit_sincos(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, - inst->Instruction.Saturate); - } + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - /* EXP tmp.x, tmp.x */ - /* DIV dst.y, abs(s0.x), tmp.x */ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - struct tgsi_full_dst_register dst_y = - writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); - emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, - FALSE); - emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, - &tmp_src, inst->Instruction.Saturate); + if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) + { + emit_dst_register(emit, &tmp_dst_x); /* first destination register */ + emit_null_dst_register(emit); /* second destination register */ + } + else { + emit_null_dst_register(emit); + emit_dst_register(emit, &tmp_dst_x); } - /* MOV dst.w, 1.0 */ - if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { - struct tgsi_full_dst_register dst_w = - writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); - struct tgsi_full_src_register one = - make_immediate_reg_float(emit, 1.0f); + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); - } + emit_instruction_opn(emit, VGPU10_OPCODE_MOV, + &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, + inst->Instruction.Saturate, + inst->Instruction.Precise); free_temp_indexes(emit); @@ -4373,31 +7206,31 @@ emit_log(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_LRP instruction. + * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. */ static boolean -emit_lrp(struct svga_shader_emitter_v10 *emit, +emit_sle(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - /* dst = LRP(s0, s1, s2): - * dst = s0 * (s1 - s2) + s2 + /* dst = SLE(s0, s1): + * dst = s0 <= s1 ? 1.0 : 0.0 (per component) * Translates into: - * SUB tmp, s1, s2; tmp = s1 - s2 - * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 + * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) */ unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); - struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); - struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - /* ADD tmp, s1, -s2 */ - emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, - &inst->Src[1], &neg_src2, FALSE); + /* GE tmp, s1, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], + &inst->Src[0]); - /* MAD dst, s1, tmp, s3 */ - emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], - &inst->Src[0], &src_tmp, &inst->Src[2], - inst->Instruction.Saturate); + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero); free_temp_indexes(emit); @@ -4406,41 +7239,65 @@ emit_lrp(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_POW instruction. + * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. */ static boolean -emit_pow(struct svga_shader_emitter_v10 *emit, +emit_slt(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - /* Note that TGSI_OPCODE_POW computes only one value from src0.x and - * src1.x while VGPU10 computes four values. - * - * dst = POW(src0, src1): - * dst.xyzw = src0.x ^ src1.x + /* dst = SLT(s0, s1): + * dst = s0 < s1 ? 1.0 : 0.0 (per component) + * Translates into: + * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) */ unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register src0_xxxx = - swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, - TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); - struct tgsi_full_src_register src1_xxxx = - swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, - TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - /* LOG tmp, s0.xxxx */ - emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, - FALSE); + /* LT tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], + &inst->Src[1]); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. + */ +static boolean +emit_sne(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SNE(s0, s1): + * dst = s0 != s1 ? 1.0 : 0.0 (per component) + * Translates into: + * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - /* MUL tmp, tmp, s1.xxxx */ - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, - &src1_xxxx, FALSE); + /* NE tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], + &inst->Src[1]); - /* EXP tmp, s0.xxxx */ - emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], - &tmp_src, inst->Instruction.Saturate); + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero); - /* free tmp */ free_temp_indexes(emit); return TRUE; @@ -4448,30 +7305,45 @@ emit_pow(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. + * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. */ static boolean -emit_rcp(struct svga_shader_emitter_v10 *emit, +emit_ssg(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 + * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 + * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 + * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 + * Translates into: + * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) + * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) + * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) + */ + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register neg_one = + make_immediate_reg_float(emit, -1.0f); - struct tgsi_full_dst_register tmp_dst_x = - writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - struct tgsi_full_src_register tmp_src_xxxx = - scalar_src(&tmp_src, TGSI_SWIZZLE_X); + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); - /* DIV tmp.x, 1.0, s0 */ - emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, - &inst->Src[0], FALSE); + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); - /* MOV dst, tmp.xxxx */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], - &tmp_src_xxxx, inst->Instruction.Saturate); + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], + &zero); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, + &neg_one, &zero); + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, + &inst->Src[0]); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, + &one, &tmp2_src); free_temp_indexes(emit); @@ -4480,37 +7352,40 @@ emit_rcp(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_RSQ instruction. + * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. */ static boolean -emit_rsq(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_issg(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - /* dst = RSQ(src): - * dst.xyzw = 1 / sqrt(src.x) + /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 * Translates into: - * RSQ tmp, src.x - * MOV dst, tmp.xxxx + * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) + * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) + * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); - struct tgsi_full_dst_register tmp_dst_x = - writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - struct tgsi_full_src_register tmp_src_xxxx = - scalar_src(&tmp_src, TGSI_SWIZZLE_X); + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); - /* RSQ tmp, src.x */ - emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, - &inst->Src[0], FALSE); + struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); - /* MOV dst, tmp.xxxx */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], - &tmp_src_xxxx, inst->Instruction.Saturate); + emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, + &inst->Src[0], &zero); + emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, + &zero, &inst->Src[0]); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], + &tmp1_src, &neg_tmp2); - /* free tmp */ free_temp_indexes(emit); return TRUE; @@ -4518,284 +7393,395 @@ emit_rsq(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. + * Emit a comparison instruction. The dest register will get + * 0 or ~0 values depending on the outcome of comparing src0 to src1. */ -static boolean -emit_seq(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +static void +emit_comparison(struct svga_shader_emitter_v10 *emit, + SVGA3dCmpFunc func, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src0, + const struct tgsi_full_src_register *src1) { - /* dst = SEQ(s0, s1): - * dst = s0 == s1 ? 1.0 : 0.0 (per component) - * Translates into: - * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) - * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register immediate; + VGPU10OpcodeToken0 opcode0; + boolean swapSrc = FALSE; - /* EQ tmp, s0, s1 */ - emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], - &inst->Src[1], FALSE); + /* Sanity checks for svga vs. gallium enums */ + STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); + STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); - /* MOVC dst, tmp, one, zero */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, - &one, &zero, FALSE); + opcode0.value = 0; - free_temp_indexes(emit); + switch (func) { + case SVGA3D_CMP_NEVER: + immediate = make_immediate_reg_int(emit, 0); + /* MOV dst, {0} */ + begin_emit_instruction(emit); + emit_dword(emit, VGPU10_OPCODE_MOV); + emit_dst_register(emit, dst); + emit_src_register(emit, &immediate); + end_emit_instruction(emit); + return; + case SVGA3D_CMP_ALWAYS: + immediate = make_immediate_reg_int(emit, -1); + /* MOV dst, {-1} */ + begin_emit_instruction(emit); + emit_dword(emit, VGPU10_OPCODE_MOV); + emit_dst_register(emit, dst); + emit_src_register(emit, &immediate); + end_emit_instruction(emit); + return; + case SVGA3D_CMP_LESS: + opcode0.opcodeType = VGPU10_OPCODE_LT; + break; + case SVGA3D_CMP_EQUAL: + opcode0.opcodeType = VGPU10_OPCODE_EQ; + break; + case SVGA3D_CMP_LESSEQUAL: + opcode0.opcodeType = VGPU10_OPCODE_GE; + swapSrc = TRUE; + break; + case SVGA3D_CMP_GREATER: + opcode0.opcodeType = VGPU10_OPCODE_LT; + swapSrc = TRUE; + break; + case SVGA3D_CMP_NOTEQUAL: + opcode0.opcodeType = VGPU10_OPCODE_NE; + break; + case SVGA3D_CMP_GREATEREQUAL: + opcode0.opcodeType = VGPU10_OPCODE_GE; + break; + default: + assert(!"Unexpected comparison mode"); + opcode0.opcodeType = VGPU10_OPCODE_EQ; + } - return TRUE; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dst_register(emit, dst); + if (swapSrc) { + emit_src_register(emit, src1); + emit_src_register(emit, src0); + } + else { + emit_src_register(emit, src0); + emit_src_register(emit, src1); + } + end_emit_instruction(emit); } /** - * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. + * Get texel/address offsets for a texture instruction. */ -static boolean -emit_sge(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +static void +get_texel_offsets(const struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, int offsets[3]) { - /* dst = SGE(s0, s1): - * dst = s0 >= s1 ? 1.0 : 0.0 (per component) - * Translates into: - * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) - * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + if (inst->Texture.NumOffsets == 1) { + /* According to OpenGL Shader Language spec the offsets are only + * fetched from a previously-declared immediate/literal. + */ + const struct tgsi_texture_offset *off = inst->TexOffsets; + const unsigned index = off[0].Index; + const unsigned swizzleX = off[0].SwizzleX; + const unsigned swizzleY = off[0].SwizzleY; + const unsigned swizzleZ = off[0].SwizzleZ; + const union tgsi_immediate_data *imm = emit->immediates[index]; - /* GE tmp, s0, s1 */ - emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], - &inst->Src[1], FALSE); + assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); - /* MOVC dst, tmp, one, zero */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, - &one, &zero, FALSE); + offsets[0] = imm[swizzleX].Int; + offsets[1] = imm[swizzleY].Int; + offsets[2] = imm[swizzleZ].Int; + } + else { + offsets[0] = offsets[1] = offsets[2] = 0; + } +} - free_temp_indexes(emit); - return TRUE; +/** + * Set up the coordinate register for texture sampling. + * When we're sampling from a RECT texture we have to scale the + * unnormalized coordinate to a normalized coordinate. + * We do that by multiplying the coordinate by an "extra" constant. + * An alternative would be to use the RESINFO instruction to query the + * texture's size. + */ +static struct tgsi_full_src_register +setup_texcoord(struct svga_shader_emitter_v10 *emit, + unsigned unit, + const struct tgsi_full_src_register *coord) +{ + if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) { + unsigned scale_index = emit->texcoord_scale_index[unit]; + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); + + if (emit->key.tex[unit].texel_bias) { + /* to fix texture coordinate rounding issue, 0.0001 offset is + * been added. This fixes piglit test fbo-blit-scaled-linear. */ + struct tgsi_full_src_register offset = + make_immediate_reg_float(emit, 0.0001f); + + /* ADD tmp, coord, offset */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst, + coord, &offset); + /* MUL tmp, tmp, scale */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, + &tmp_src, &scale_src); + } + else { + /* MUL tmp, coord, const[] */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, + coord, &scale_src); + } + return tmp_src; + } + else { + /* use texcoord as-is */ + return *coord; + } } /** - * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. + * For SAMPLE_C instructions, emit the extra src register which indicates + * the reference/comparision value. */ -static boolean -emit_sgt(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +static void +emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, + enum tgsi_texture_type target, + const struct tgsi_full_src_register *coord) { - /* dst = SGT(s0, s1): - * dst = s0 > s1 ? 1.0 : 0.0 (per component) - * Translates into: - * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) - * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register coord_src_ref; + int component; - /* LT tmp, s1, s0 */ - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], - &inst->Src[0], FALSE); + assert(tgsi_is_shadow_target(target)); - /* MOVC dst, tmp, one, zero */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, - &one, &zero, FALSE); + component = tgsi_util_get_shadow_ref_src_index(target) % 4; + assert(component >= 0); - free_temp_indexes(emit); + coord_src_ref = scalar_src(coord, component); - return TRUE; + emit_src_register(emit, &coord_src_ref); } /** - * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. + * Info for implementing texture swizzles. + * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() + * functions use this to encapsulate the extra steps needed to perform + * a texture swizzle, or shadow/depth comparisons. + * The shadow/depth comparison is only done here if for the cases where + * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). */ -static boolean -emit_sincos(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +struct tex_swizzle_info { - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + boolean swizzled; + boolean shadow_compare; + unsigned unit; + enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ + struct tgsi_full_src_register tmp_src; + struct tgsi_full_dst_register tmp_dst; + const struct tgsi_full_dst_register *inst_dst; + const struct tgsi_full_src_register *coord_src; +}; - struct tgsi_full_src_register tmp_src_xxxx = - scalar_src(&tmp_src, TGSI_SWIZZLE_X); - struct tgsi_full_dst_register tmp_dst_x = - writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); - begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); +/** + * Do setup for handling texture swizzles or shadow compares. + * \param unit the texture unit + * \param inst the TGSI texture instruction + * \param shadow_compare do shadow/depth comparison? + * \param swz returns the swizzle info + */ +static void +begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, + unsigned unit, + const struct tgsi_full_instruction *inst, + boolean shadow_compare, + struct tex_swizzle_info *swz) +{ + swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || + emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || + emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || + emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); - if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) - { - emit_dst_register(emit, &tmp_dst_x); /* first destination register */ - emit_null_dst_register(emit); /* second destination register */ - } - else { - emit_null_dst_register(emit); - emit_dst_register(emit, &tmp_dst_x); - } + swz->shadow_compare = shadow_compare; + swz->texture_target = inst->Texture.Texture; - emit_src_register(emit, &inst->Src[0]); - end_emit_instruction(emit); + if (swz->swizzled || shadow_compare) { + /* Allocate temp register for the result of the SAMPLE instruction + * and the source of the MOV/compare/swizzle instructions. + */ + unsigned tmp = get_temp_index(emit); + swz->tmp_src = make_src_temp_reg(tmp); + swz->tmp_dst = make_dst_temp_reg(tmp); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], - &tmp_src_xxxx, inst->Instruction.Saturate); + swz->unit = unit; + } + swz->inst_dst = &inst->Dst[0]; + swz->coord_src = &inst->Src[0]; - free_temp_indexes(emit); + emit->fs.shadow_compare_units |= shadow_compare << unit; +} - return TRUE; + +/** + * Returns the register to put the SAMPLE instruction results into. + * This will either be the original instruction dst reg (if no swizzle + * and no shadow comparison) or a temporary reg if there is a swizzle. + */ +static const struct tgsi_full_dst_register * +get_tex_swizzle_dst(const struct tex_swizzle_info *swz) +{ + return (swz->swizzled || swz->shadow_compare) + ? &swz->tmp_dst : swz->inst_dst; } /** - * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. + * This emits the MOV instruction that actually implements a texture swizzle + * and/or shadow comparison. */ -static boolean -emit_sle(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +static void +end_tex_swizzle(struct svga_shader_emitter_v10 *emit, + const struct tex_swizzle_info *swz) { - /* dst = SLE(s0, s1): - * dst = s0 <= s1 ? 1.0 : 0.0 (per component) - * Translates into: - * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) - * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + if (swz->shadow_compare) { + /* Emit extra instructions to compare the fetched texel value against + * a texture coordinate component. The result of the comparison + * is 0.0 or 1.0. + */ + struct tgsi_full_src_register coord_src; + struct tgsi_full_src_register texel_src = + scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + /* convert gallium comparison func to SVGA comparison func */ + SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; - /* GE tmp, s1, s0 */ - emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], - &inst->Src[0], FALSE); + int component = + tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4; + assert(component >= 0); + coord_src = scalar_src(swz->coord_src, component); - /* MOVC dst, tmp, one, zero */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, - &one, &zero, FALSE); + /* COMPARE tmp, coord, texel */ + emit_comparison(emit, compare_func, + &swz->tmp_dst, &coord_src, &texel_src); - free_temp_indexes(emit); + /* AND dest, tmp, {1.0} */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); + if (swz->swizzled) { + emit_dst_register(emit, &swz->tmp_dst); + } + else { + emit_dst_register(emit, swz->inst_dst); + } + emit_src_register(emit, &swz->tmp_src); + emit_src_register(emit, &one); + end_emit_instruction(emit); + } - return TRUE; -} + if (swz->swizzled) { + unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; + unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; + unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; + unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; + unsigned writemask_0 = 0, writemask_1 = 0; + boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); + /* Swizzle w/out zero/one terms */ + struct tgsi_full_src_register src_swizzled = + swizzle_src(&swz->tmp_src, + swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, + swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, + swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, + swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); -/** - * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. - */ -static boolean -emit_slt(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) -{ - /* dst = SLT(s0, s1): - * dst = s0 < s1 ? 1.0 : 0.0 (per component) - * Translates into: - * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) - * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + /* MOV dst, color(tmp). */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + swz->inst_dst, &src_swizzled); - /* LT tmp, s0, s1 */ - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], - &inst->Src[1], FALSE); + /* handle swizzle zero terms */ + writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | + ((swz_g == PIPE_SWIZZLE_0) << 1) | + ((swz_b == PIPE_SWIZZLE_0) << 2) | + ((swz_a == PIPE_SWIZZLE_0) << 3)); + writemask_0 &= swz->inst_dst->Register.WriteMask; - /* MOVC dst, tmp, one, zero */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, - &one, &zero, FALSE); + if (writemask_0) { + struct tgsi_full_src_register zero = int_tex ? + make_immediate_reg_int(emit, 0) : + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_dst_register dst = + writemask_dst(swz->inst_dst, writemask_0); - free_temp_indexes(emit); + /* MOV dst.writemask_0, {0,0,0,0} */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero); + } + + /* handle swizzle one terms */ + writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | + ((swz_g == PIPE_SWIZZLE_1) << 1) | + ((swz_b == PIPE_SWIZZLE_1) << 2) | + ((swz_a == PIPE_SWIZZLE_1) << 3)); + writemask_1 &= swz->inst_dst->Register.WriteMask; + + if (writemask_1) { + struct tgsi_full_src_register one = int_tex ? + make_immediate_reg_int(emit, 1) : + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_dst_register dst = + writemask_dst(swz->inst_dst, writemask_1); - return TRUE; + /* MOV dst.writemask_1, {1,1,1,1} */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one); + } + } } /** - * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. + * Emit code for TGSI_OPCODE_SAMPLE instruction. */ static boolean -emit_sne(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_sample(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - /* dst = SNE(s0, s1): - * dst = s0 != s1 ? 1.0 : 0.0 (per component) - * Translates into: - * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) - * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) - */ - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); - - /* NE tmp, s0, s1 */ - emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], - &inst->Src[1], FALSE); + const unsigned resource_unit = inst->Src[1].Register.Index; + const unsigned sampler_unit = inst->Src[2].Register.Index; + struct tgsi_full_src_register coord; + int offsets[3]; + struct tex_swizzle_info swz_info; - /* MOVC dst, tmp, one, zero */ - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, - &one, &zero, FALSE); + begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); - free_temp_indexes(emit); + get_texel_offsets(emit, inst, offsets); - return TRUE; -} + coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); + /* SAMPLE dst, coord(s0), resource, sampler */ + begin_emit_instruction(emit); -/** - * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. - */ -static boolean -emit_ssg(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) -{ - /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 - * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 - * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 - * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 - * Translates into: - * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) - * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) - * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) - * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) + /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L + * with LOD=0. But our virtual GPU accepts this as-is. */ - struct tgsi_full_src_register zero = - make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_src_register one = - make_immediate_reg_float(emit, 1.0f); - struct tgsi_full_src_register neg_one = - make_immediate_reg_float(emit, -1.0f); - - unsigned tmp1 = get_temp_index(emit); - struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); - struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); - - unsigned tmp2 = get_temp_index(emit); - struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); - struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, resource_unit); + emit_sampler_register(emit, sampler_unit); + end_emit_instruction(emit); - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], - &zero, FALSE); - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, - &neg_one, &zero, FALSE); - emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, - &inst->Src[0], FALSE); - emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, - &one, &tmp2_src, FALSE); + end_tex_swizzle(emit, &swz_info); free_temp_indexes(emit); @@ -4804,436 +7790,396 @@ emit_ssg(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. + * Check if a texture instruction is valid. + * An example of an invalid texture instruction is doing shadow comparison + * with an integer-valued texture. + * If we detect an invalid texture instruction, we replace it with: + * MOV dst, {1,1,1,1}; + * \return TRUE if valid, FALSE if invalid. */ static boolean -emit_issg(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 - * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 - * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 - * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 - * Translates into: - * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) - * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) - * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) - */ - struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); - - unsigned tmp1 = get_temp_index(emit); - struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); - struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); - - unsigned tmp2 = get_temp_index(emit); - struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); - struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); - - struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); + const unsigned unit = inst->Src[1].Register.Index; + const enum tgsi_texture_type target = inst->Texture.Texture; + boolean valid = TRUE; - emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, - &inst->Src[0], &zero, FALSE); - emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, - &zero, &inst->Src[0], FALSE); - emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], - &tmp1_src, &neg_tmp2, FALSE); + if (tgsi_is_shadow_target(target) && + is_integer_type(emit->sampler_return_type[unit])) { + debug_printf("Invalid SAMPLE_C with an integer texture!\n"); + valid = FALSE; + } + /* XXX might check for other conditions in the future here */ - free_temp_indexes(emit); + if (!valid) { + /* emit a MOV dst, {1,1,1,1} instruction. */ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &one); + end_emit_instruction(emit); + } - return TRUE; + return valid; } /** - * Emit a comparison instruction. The dest register will get - * 0 or ~0 values depending on the outcome of comparing src0 to src1. + * Emit code for TGSI_OPCODE_TEX (simple texture lookup) */ -static void -emit_comparison(struct svga_shader_emitter_v10 *emit, - SVGA3dCmpFunc func, - const struct tgsi_full_dst_register *dst, - const struct tgsi_full_src_register *src0, - const struct tgsi_full_src_register *src1) +static boolean +emit_tex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - struct tgsi_full_src_register immediate; - VGPU10OpcodeToken0 opcode0; - boolean swapSrc = FALSE; + const uint unit = inst->Src[1].Register.Index; + const enum tgsi_texture_type target = inst->Texture.Texture; + VGPU10_OPCODE_TYPE opcode; + struct tgsi_full_src_register coord; + int offsets[3]; + struct tex_swizzle_info swz_info; - /* Sanity checks for svga vs. gallium enums */ - STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); - STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; - opcode0.value = 0; + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); - switch (func) { - case SVGA3D_CMP_NEVER: - immediate = make_immediate_reg_int(emit, 0); - /* MOV dst, {0} */ - begin_emit_instruction(emit); - emit_dword(emit, VGPU10_OPCODE_MOV); - emit_dst_register(emit, dst); - emit_src_register(emit, &immediate); - end_emit_instruction(emit); - return; - case SVGA3D_CMP_ALWAYS: - immediate = make_immediate_reg_int(emit, -1); - /* MOV dst, {-1} */ - begin_emit_instruction(emit); - emit_dword(emit, VGPU10_OPCODE_MOV); - emit_dst_register(emit, dst); - emit_src_register(emit, &immediate); - end_emit_instruction(emit); - return; - case SVGA3D_CMP_LESS: - opcode0.opcodeType = VGPU10_OPCODE_LT; - break; - case SVGA3D_CMP_EQUAL: - opcode0.opcodeType = VGPU10_OPCODE_EQ; - break; - case SVGA3D_CMP_LESSEQUAL: - opcode0.opcodeType = VGPU10_OPCODE_GE; - swapSrc = TRUE; - break; - case SVGA3D_CMP_GREATER: - opcode0.opcodeType = VGPU10_OPCODE_LT; - swapSrc = TRUE; - break; - case SVGA3D_CMP_NOTEQUAL: - opcode0.opcodeType = VGPU10_OPCODE_NE; - break; - case SVGA3D_CMP_GREATEREQUAL: - opcode0.opcodeType = VGPU10_OPCODE_GE; - break; - default: - assert(!"Unexpected comparison mode"); - opcode0.opcodeType = VGPU10_OPCODE_EQ; - } + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + /* SAMPLE dst, coord(s0), resource, sampler */ begin_emit_instruction(emit); - emit_dword(emit, opcode0.value); - emit_dst_register(emit, dst); - if (swapSrc) { - emit_src_register(emit, src1); - emit_src_register(emit, src0); - } - else { - emit_src_register(emit, src0); - emit_src_register(emit, src1); + + if (tgsi_is_shadow_target(target)) + opcode = VGPU10_OPCODE_SAMPLE_C; + else + opcode = VGPU10_OPCODE_SAMPLE; + + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &coord); } end_emit_instruction(emit); -} - -/** - * Get texel/address offsets for a texture instruction. - */ -static void -get_texel_offsets(const struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst, int offsets[3]) -{ - if (inst->Texture.NumOffsets == 1) { - /* According to OpenGL Shader Language spec the offsets are only - * fetched from a previously-declared immediate/literal. - */ - const struct tgsi_texture_offset *off = inst->TexOffsets; - const unsigned index = off[0].Index; - const unsigned swizzleX = off[0].SwizzleX; - const unsigned swizzleY = off[0].SwizzleY; - const unsigned swizzleZ = off[0].SwizzleZ; - const union tgsi_immediate_data *imm = emit->immediates[index]; + end_tex_swizzle(emit, &swz_info); - assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); + free_temp_indexes(emit); - offsets[0] = imm[swizzleX].Int; - offsets[1] = imm[swizzleY].Int; - offsets[2] = imm[swizzleZ].Int; - } - else { - offsets[0] = offsets[1] = offsets[2] = 0; - } + return TRUE; } - /** - * Set up the coordinate register for texture sampling. - * When we're sampling from a RECT texture we have to scale the - * unnormalized coordinate to a normalized coordinate. - * We do that by multiplying the coordinate by an "extra" constant. - * An alternative would be to use the RESINFO instruction to query the - * texture's size. + * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) */ -static struct tgsi_full_src_register -setup_texcoord(struct svga_shader_emitter_v10 *emit, - unsigned unit, - const struct tgsi_full_src_register *coord) +static boolean +emit_tg4(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) { - unsigned scale_index = emit->texcoord_scale_index[unit]; - unsigned tmp = get_temp_index(emit); - struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); + const uint unit = inst->Src[2].Register.Index; + struct tgsi_full_src_register src; + struct tgsi_full_src_register offset_src, sampler, ref; + int offsets[3]; - if (emit->key.tex[unit].texel_bias) { - /* to fix texture coordinate rounding issue, 0.0001 offset is - * been added. This fixes piglit test fbo-blit-scaled-linear. */ - struct tgsi_full_src_register offset = - make_immediate_reg_float(emit, 0.0001f); + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; - /* ADD tmp, coord, offset */ - emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst, - coord, &offset, FALSE); - /* MUL tmp, tmp, scale */ - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, - &tmp_src, &scale_src, FALSE); + if (emit->version >= 50) { + unsigned target = inst->Texture.Texture; + int index = inst->Src[1].Register.Index; + const union tgsi_immediate_data *imm = emit->immediates[index]; + int select_comp = imm[inst->Src[1].Register.SwizzleX].Int; + unsigned select_swizzle = PIPE_SWIZZLE_X; + + if (!tgsi_is_shadow_target(target)) { + switch (select_comp) { + case 0: + select_swizzle = emit->key.tex[unit].swizzle_r; + break; + case 1: + select_swizzle = emit->key.tex[unit].swizzle_g; + break; + case 2: + select_swizzle = emit->key.tex[unit].swizzle_b; + break; + case 3: + select_swizzle = emit->key.tex[unit].swizzle_a; + break; + default: + assert(!"Unexpected component in texture gather swizzle"); + } } else { - /* MUL tmp, coord, const[] */ - emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, - coord, &scale_src, FALSE); + select_swizzle = emit->key.tex[unit].swizzle_r; + } + + if (select_swizzle == PIPE_SWIZZLE_1) { + src = make_immediate_reg_float(emit, 1.0); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); + return TRUE; + } + else if (select_swizzle == PIPE_SWIZZLE_0) { + src = make_immediate_reg_float(emit, 0.0); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); + return TRUE; } - return tmp_src; - } - else { - /* use texcoord as-is */ - return *coord; - } -} + src = setup_texcoord(emit, unit, &inst->Src[0]); -/** - * For SAMPLE_C instructions, emit the extra src register which indicates - * the reference/comparision value. - */ -static void -emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, - enum tgsi_texture_type target, - const struct tgsi_full_src_register *coord) -{ - struct tgsi_full_src_register coord_src_ref; - int component; + /* GATHER4 dst, coord, resource, sampler */ + /* GATHER4_C dst, coord, resource, sampler ref */ + /* GATHER4_PO dst, coord, offset resource, sampler */ + /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */ + begin_emit_instruction(emit); + if (inst->Texture.NumOffsets == 1) { + if (tgsi_is_shadow_target(target)) { + emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C, + inst->Instruction.Saturate); + } + else { + emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO, + inst->Instruction.Saturate); + } + } + else { + if (tgsi_is_shadow_target(target)) { + emit_opcode(emit, VGPU10_OPCODE_GATHER4_C, + inst->Instruction.Saturate); + } + else { + emit_opcode(emit, VGPU10_OPCODE_GATHER4, + inst->Instruction.Saturate); + } + } - assert(tgsi_is_shadow_target(target)); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &src); + if (inst->Texture.NumOffsets == 1) { + /* offset */ + offset_src = make_src_reg(inst->TexOffsets[0].File, + inst->TexOffsets[0].Index); + offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX, + inst->TexOffsets[0].SwizzleY, + inst->TexOffsets[0].SwizzleZ, + TGSI_SWIZZLE_W); + emit_src_register(emit, &offset_src); + } - component = tgsi_util_get_shadow_ref_src_index(target) % 4; - assert(component >= 0); + /* resource */ + emit_resource_register(emit, unit); - coord_src_ref = scalar_src(coord, component); + /* sampler */ + sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); + sampler.Register.SwizzleX = + sampler.Register.SwizzleY = + sampler.Register.SwizzleZ = + sampler.Register.SwizzleW = select_swizzle; + emit_src_register(emit, &sampler); + + if (tgsi_is_shadow_target(target)) { + /* ref */ + if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { + ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + emit_tex_compare_refcoord(emit, target, &ref); + } + else { + emit_tex_compare_refcoord(emit, target, &src); + } + } - emit_src_register(emit, &coord_src_ref); + end_emit_instruction(emit); + free_temp_indexes(emit); + } + else { + /* Only a single channel is supported in SM4_1 and we report + * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1. + * Only the 0th component will be gathered. + */ + switch (emit->key.tex[unit].swizzle_r) { + case PIPE_SWIZZLE_X: + get_texel_offsets(emit, inst, offsets); + src = setup_texcoord(emit, unit, &inst->Src[0]); + + /* Gather dst, coord, resource, sampler */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &src); + emit_resource_register(emit, unit); + + /* sampler */ + sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); + sampler.Register.SwizzleX = + sampler.Register.SwizzleY = + sampler.Register.SwizzleZ = + sampler.Register.SwizzleW = PIPE_SWIZZLE_X; + emit_src_register(emit, &sampler); + + end_emit_instruction(emit); + break; + case PIPE_SWIZZLE_W: + case PIPE_SWIZZLE_1: + src = make_immediate_reg_float(emit, 1.0); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); + break; + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_0: + default: + src = make_immediate_reg_float(emit, 0.0); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); + break; + } + } + + return TRUE; } + /** - * Info for implementing texture swizzles. - * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() - * functions use this to encapsulate the extra steps needed to perform - * a texture swizzle, or shadow/depth comparisons. - * The shadow/depth comparison is only done here if for the cases where - * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). + * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) */ -struct tex_swizzle_info +static boolean +emit_tex2(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - boolean swizzled; - boolean shadow_compare; - unsigned unit; - enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ - struct tgsi_full_src_register tmp_src; - struct tgsi_full_dst_register tmp_dst; - const struct tgsi_full_dst_register *inst_dst; - const struct tgsi_full_src_register *coord_src; -}; + const uint unit = inst->Src[2].Register.Index; + unsigned target = inst->Texture.Texture; + struct tgsi_full_src_register coord, ref; + int offsets[3]; + struct tex_swizzle_info swz_info; + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; -/** - * Do setup for handling texture swizzles or shadow compares. - * \param unit the texture unit - * \param inst the TGSI texture instruction - * \param shadow_compare do shadow/depth comparison? - * \param swz returns the swizzle info - */ -static void -begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, - unsigned unit, - const struct tgsi_full_instruction *inst, - boolean shadow_compare, - struct tex_swizzle_info *swz) -{ - swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || - emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || - emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || - emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); - swz->shadow_compare = shadow_compare; - swz->texture_target = inst->Texture.Texture; + get_texel_offsets(emit, inst, offsets); - if (swz->swizzled || shadow_compare) { - /* Allocate temp register for the result of the SAMPLE instruction - * and the source of the MOV/compare/swizzle instructions. - */ - unsigned tmp = get_temp_index(emit); - swz->tmp_src = make_src_temp_reg(tmp); - swz->tmp_dst = make_dst_temp_reg(tmp); + coord = setup_texcoord(emit, unit, &inst->Src[0]); + ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); - swz->unit = unit; - } - swz->inst_dst = &inst->Dst[0]; - swz->coord_src = &inst->Src[0]; + /* SAMPLE_C dst, coord, resource, sampler, ref */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_tex_compare_refcoord(emit, target, &ref); + end_emit_instruction(emit); - emit->fs.shadow_compare_units |= shadow_compare << unit; -} + end_tex_swizzle(emit, &swz_info); + free_temp_indexes(emit); -/** - * Returns the register to put the SAMPLE instruction results into. - * This will either be the original instruction dst reg (if no swizzle - * and no shadow comparison) or a temporary reg if there is a swizzle. - */ -static const struct tgsi_full_dst_register * -get_tex_swizzle_dst(const struct tex_swizzle_info *swz) -{ - return (swz->swizzled || swz->shadow_compare) - ? &swz->tmp_dst : swz->inst_dst; + return TRUE; } /** - * This emits the MOV instruction that actually implements a texture swizzle - * and/or shadow comparison. + * Emit code for TGSI_OPCODE_TXP (projective texture) */ -static void -end_tex_swizzle(struct svga_shader_emitter_v10 *emit, - const struct tex_swizzle_info *swz) +static boolean +emit_txp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - if (swz->shadow_compare) { - /* Emit extra instructions to compare the fetched texel value against - * a texture coordinate component. The result of the comparison - * is 0.0 or 1.0. - */ - struct tgsi_full_src_register coord_src; - struct tgsi_full_src_register texel_src = - scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); - struct tgsi_full_src_register one = - make_immediate_reg_float(emit, 1.0f); - /* convert gallium comparison func to SVGA comparison func */ - SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; - - assert(emit->unit == PIPE_SHADER_FRAGMENT); - - int component = - tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4; - assert(component >= 0); - coord_src = scalar_src(swz->coord_src, component); + const uint unit = inst->Src[1].Register.Index; + const enum tgsi_texture_type target = inst->Texture.Texture; + VGPU10_OPCODE_TYPE opcode; + int offsets[3]; + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register src0_wwww = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + struct tgsi_full_src_register coord; + struct tex_swizzle_info swz_info; - /* COMPARE tmp, coord, texel */ - emit_comparison(emit, compare_func, - &swz->tmp_dst, &coord_src, &texel_src); + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; - /* AND dest, tmp, {1.0} */ - begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); - if (swz->swizzled) { - emit_dst_register(emit, &swz->tmp_dst); - } - else { - emit_dst_register(emit, swz->inst_dst); - } - emit_src_register(emit, &swz->tmp_src); - emit_src_register(emit, &one); - end_emit_instruction(emit); - } + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); - if (swz->swizzled) { - unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; - unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; - unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; - unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; - unsigned writemask_0 = 0, writemask_1 = 0; - boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); + get_texel_offsets(emit, inst, offsets); - /* Swizzle w/out zero/one terms */ - struct tgsi_full_src_register src_swizzled = - swizzle_src(&swz->tmp_src, - swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, - swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, - swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, - swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); + coord = setup_texcoord(emit, unit, &inst->Src[0]); - /* MOV dst, color(tmp). */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - swz->inst_dst, &src_swizzled, FALSE); + /* DIV tmp, coord, coord.wwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, + &coord, &src0_wwww); - /* handle swizzle zero terms */ - writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | - ((swz_g == PIPE_SWIZZLE_0) << 1) | - ((swz_b == PIPE_SWIZZLE_0) << 2) | - ((swz_a == PIPE_SWIZZLE_0) << 3)); - writemask_0 &= swz->inst_dst->Register.WriteMask; + /* SAMPLE dst, coord(tmp), resource, sampler */ + begin_emit_instruction(emit); - if (writemask_0) { - struct tgsi_full_src_register zero = int_tex ? - make_immediate_reg_int(emit, 0) : - make_immediate_reg_float(emit, 0.0f); - struct tgsi_full_dst_register dst = - writemask_dst(swz->inst_dst, writemask_0); + if (tgsi_is_shadow_target(target)) + /* NOTE: for non-fragment shaders, we should use + * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. + */ + opcode = VGPU10_OPCODE_SAMPLE_C; + else + opcode = VGPU10_OPCODE_SAMPLE; - /* MOV dst.writemask_0, {0,0,0,0} */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &dst, &zero, FALSE); - } + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &tmp_src); /* projected coord */ + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &tmp_src); + } + end_emit_instruction(emit); - /* handle swizzle one terms */ - writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | - ((swz_g == PIPE_SWIZZLE_1) << 1) | - ((swz_b == PIPE_SWIZZLE_1) << 2) | - ((swz_a == PIPE_SWIZZLE_1) << 3)); - writemask_1 &= swz->inst_dst->Register.WriteMask; + end_tex_swizzle(emit, &swz_info); - if (writemask_1) { - struct tgsi_full_src_register one = int_tex ? - make_immediate_reg_int(emit, 1) : - make_immediate_reg_float(emit, 1.0f); - struct tgsi_full_dst_register dst = - writemask_dst(swz->inst_dst, writemask_1); + free_temp_indexes(emit); - /* MOV dst.writemask_1, {1,1,1,1} */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); - } - } + return TRUE; } /** - * Emit code for TGSI_OPCODE_SAMPLE instruction. + * Emit code for TGSI_OPCODE_TXD (explicit derivatives) */ static boolean -emit_sample(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_txd(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const unsigned resource_unit = inst->Src[1].Register.Index; - const unsigned sampler_unit = inst->Src[2].Register.Index; - struct tgsi_full_src_register coord; + const uint unit = inst->Src[3].Register.Index; + const enum tgsi_texture_type target = inst->Texture.Texture; int offsets[3]; + struct tgsi_full_src_register coord; struct tex_swizzle_info swz_info; - begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); get_texel_offsets(emit, inst, offsets); - coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); + coord = setup_texcoord(emit, unit, &inst->Src[0]); - /* SAMPLE dst, coord(s0), resource, sampler */ + /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ begin_emit_instruction(emit); - - /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L - * with LOD=0. But our virtual GPU accepts this as-is. - */ - emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, inst->Instruction.Saturate, offsets); emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); emit_src_register(emit, &coord); - emit_resource_register(emit, resource_unit); - emit_sampler_register(emit, sampler_unit); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_src_register(emit, &inst->Src[1]); /* Xderiv */ + emit_src_register(emit, &inst->Src[2]); /* Yderiv */ end_emit_instruction(emit); end_tex_swizzle(emit, &swz_info); @@ -5245,82 +8191,151 @@ emit_sample(struct svga_shader_emitter_v10 *emit, /** - * Check if a texture instruction is valid. - * An example of an invalid texture instruction is doing shadow comparison - * with an integer-valued texture. - * If we detect an invalid texture instruction, we replace it with: - * MOV dst, {1,1,1,1}; - * \return TRUE if valid, FALSE if invalid. + * Emit code for TGSI_OPCODE_TXF (texel fetch) */ static boolean -is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_txf(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const unsigned unit = inst->Src[1].Register.Index; - const enum tgsi_texture_type target = inst->Texture.Texture; - boolean valid = TRUE; + const uint unit = inst->Src[1].Register.Index; + const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) + && emit->key.tex[unit].num_samples > 1; + int offsets[3]; + struct tex_swizzle_info swz_info; - if (tgsi_is_shadow_target(target) && - is_integer_type(emit->sampler_return_type[unit])) { - debug_printf("Invalid SAMPLE_C with an integer texture!\n"); - valid = FALSE; - } - /* XXX might check for other conditions in the future here */ + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); - if (!valid) { - /* emit a MOV dst, {1,1,1,1} instruction. */ - struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + get_texel_offsets(emit, inst, offsets); + + if (msaa) { + assert(emit->key.tex[unit].num_samples > 1); + + /* Fetch one sample from an MSAA texture */ + struct tgsi_full_src_register sampleIndex = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + /* LD_MS dst, coord(s0), resource, sampleIndex */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); - emit_dst_register(emit, &inst->Dst[0]); - emit_src_register(emit, &one); + emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + emit_src_register(emit, &sampleIndex); + end_emit_instruction(emit); + } + else { + /* Fetch one texel specified by integer coordinate */ + /* LD dst, coord(s0), resource */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_LD, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); end_emit_instruction(emit); } - return valid; + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; } /** - * Emit code for TGSI_OPCODE_TEX (simple texture lookup) + * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) + * or TGSI_OPCODE_TXB2 (for cube shadow maps). */ static boolean -emit_tex(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_txl_txb(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; const enum tgsi_texture_type target = inst->Texture.Texture; VGPU10_OPCODE_TYPE opcode; - struct tgsi_full_src_register coord; + unsigned unit; int offsets[3]; + struct tgsi_full_src_register coord, lod_bias; struct tex_swizzle_info swz_info; - /* check that the sampler returns a float */ - if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || + inst->Instruction.Opcode == TGSI_OPCODE_TXB || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2); + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { + lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + unit = inst->Src[2].Register.Index; + } + else { + lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + unit = inst->Src[1].Register.Index; + } + + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ + begin_emit_instruction(emit); + if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { + opcode = VGPU10_OPCODE_SAMPLE_L; + } + else { + opcode = VGPU10_OPCODE_SAMPLE_B; + } + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_src_register(emit, &lod_bias); + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array. + */ +static boolean +emit_txl2(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned target = inst->Texture.Texture; + unsigned opcode, unit; + int offsets[3]; + struct tgsi_full_src_register coord, lod; + struct tex_swizzle_info swz_info; + + assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2); + + lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + unit = inst->Src[2].Register.Index; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); get_texel_offsets(emit, inst, offsets); coord = setup_texcoord(emit, unit, &inst->Src[0]); - /* SAMPLE dst, coord(s0), resource, sampler */ + /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */ begin_emit_instruction(emit); - - if (tgsi_is_shadow_target(target)) - opcode = VGPU10_OPCODE_SAMPLE_C; - else - opcode = VGPU10_OPCODE_SAMPLE; - + opcode = VGPU10_OPCODE_SAMPLE_L; emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); emit_src_register(emit, &coord); emit_resource_register(emit, unit); emit_sampler_register(emit, unit); - if (opcode == VGPU10_OPCODE_SAMPLE_C) { - emit_tex_compare_refcoord(emit, target, &coord); - } + emit_src_register(emit, &lod); end_emit_instruction(emit); end_tex_swizzle(emit, &swz_info); @@ -5330,199 +8345,311 @@ emit_tex(struct svga_shader_emitter_v10 *emit, return TRUE; } + /** - * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) + * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. */ static boolean -emit_tg4(struct svga_shader_emitter_v10 *emit, +emit_txq(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[2].Register.Index; - struct tgsi_full_src_register src; - int offsets[3]; - - /* check that the sampler returns a float */ - if (!is_valid_tex_instruction(emit, inst)) - return TRUE; - - /* Only a single channel is supported in SM4_1 and we report - * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1. - * Only the 0th component will be gathered. - */ - switch (emit->key.tex[unit].swizzle_r) { - case PIPE_SWIZZLE_X: - get_texel_offsets(emit, inst, offsets); - src = setup_texcoord(emit, unit, &inst->Src[0]); + const uint unit = inst->Src[1].Register.Index; - /* Gather dst, coord, resource, sampler */ + if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { + /* RESINFO does not support querying texture buffers, so we instead + * store texture buffer sizes in shader constants, then copy them to + * implement TXQ instead of emitting RESINFO. + * MOV dst, const[texture_buffer_size_index[unit]] + */ + struct tgsi_full_src_register size_src = + make_src_const_reg(emit->texture_buffer_size_index[unit]); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src); + } else { + /* RESINFO dst, srcMipLevel, resource */ begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4, - inst->Instruction.Saturate, offsets); + emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); emit_dst_register(emit, &inst->Dst[0]); - emit_src_register(emit, &src); + emit_src_register(emit, &inst->Src[0]); emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); end_emit_instruction(emit); - break; - case PIPE_SWIZZLE_W: - case PIPE_SWIZZLE_1: - src = make_immediate_reg_float(emit, 1.0); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &inst->Dst[0], &src, FALSE); - break; - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - case PIPE_SWIZZLE_0: - default: - src = make_immediate_reg_float(emit, 0.0); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &inst->Dst[0], &src, FALSE); - break; } + free_temp_indexes(emit); + return TRUE; } +/** + * Does this opcode produce a double-precision result? + * XXX perhaps move this to a TGSI utility. + */ +static bool +opcode_has_dbl_dst(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_F2D: + case TGSI_OPCODE_DABS: + case TGSI_OPCODE_DADD: + case TGSI_OPCODE_DFRAC: + case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DMIN: + case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DNEG: + case TGSI_OPCODE_I2D: + case TGSI_OPCODE_U2D: + // XXX more TBD + return true; + default: + return false; + } +} + /** - * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) + * Does this opcode use double-precision source registers? + */ +static bool +opcode_has_dbl_src(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_D2F: + case TGSI_OPCODE_DABS: + case TGSI_OPCODE_DADD: + case TGSI_OPCODE_DFRAC: + case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DMIN: + case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DNEG: + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + // XXX more TBD + return true; + default: + return false; + } +} + + +/** + * Check that the swizzle for reading from a double-precision register + * is valid. + */ +static void +check_double_src_swizzle(const struct tgsi_full_src_register *reg) +{ + assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X && + reg->Register.SwizzleY == PIPE_SWIZZLE_Y) || + (reg->Register.SwizzleX == PIPE_SWIZZLE_Z && + reg->Register.SwizzleY == PIPE_SWIZZLE_W)); + + assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X && + reg->Register.SwizzleW == PIPE_SWIZZLE_Y) || + (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z && + reg->Register.SwizzleW == PIPE_SWIZZLE_W)); +} + + +/** + * Check that the writemask for a double-precision instruction is valid. + */ +static void +check_double_dst_writemask(const struct tgsi_full_instruction *inst) +{ + ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_DABS: + case TGSI_OPCODE_DADD: + case TGSI_OPCODE_DFRAC: + case TGSI_OPCODE_DNEG: + case TGSI_OPCODE_DMAD: + case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DMIN: + case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DRCP: + case TGSI_OPCODE_DSQRT: + case TGSI_OPCODE_F2D: + assert(writemask == TGSI_WRITEMASK_XYZW || + writemask == TGSI_WRITEMASK_XY || + writemask == TGSI_WRITEMASK_ZW); + break; + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + /* Write to 1 or 2 components only */ + assert(util_bitcount(writemask) <= 2); + break; + default: + /* XXX this list may be incomplete */ + ; + } +} + + +/** + * Double-precision absolute value. */ static boolean -emit_tex2(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_dabs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[2].Register.Index; - unsigned target = inst->Texture.Texture; - struct tgsi_full_src_register coord, ref; - int offsets[3]; - struct tex_swizzle_info swz_info; + assert(emit->version >= 50); + check_double_src_swizzle(&inst->Src[0]); + check_double_dst_writemask(inst); - /* check that the sampler returns a float */ - if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]); - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + /* DMOV dst, |src| */ + emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src); - get_texel_offsets(emit, inst, offsets); + return TRUE; +} - coord = setup_texcoord(emit, unit, &inst->Src[0]); - ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); - /* SAMPLE_C dst, coord, resource, sampler, ref */ - begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C, - inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &coord); - emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); - emit_tex_compare_refcoord(emit, target, &ref); - end_emit_instruction(emit); +/** + * Double-precision negation + */ +static boolean +emit_dneg(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + assert(emit->version >= 50); + check_double_src_swizzle(&inst->Src[0]); + check_double_dst_writemask(inst); - end_tex_swizzle(emit, &swz_info); + struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); - free_temp_indexes(emit); + /* DMOV dst, -src */ + emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src); return TRUE; } /** - * Emit code for TGSI_OPCODE_TXP (projective texture) + * SM5 has no DMAD opcode. Implement negation with DMUL/DADD. */ static boolean -emit_txp(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_dmad(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; - const enum tgsi_texture_type target = inst->Texture.Texture; - VGPU10_OPCODE_TYPE opcode; - int offsets[3]; + assert(emit->version >= 50); + check_double_src_swizzle(&inst->Src[0]); + check_double_src_swizzle(&inst->Src[1]); + check_double_src_swizzle(&inst->Src[2]); + check_double_dst_writemask(inst); + unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); - struct tgsi_full_src_register src0_wwww = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); - struct tgsi_full_src_register coord; - struct tex_swizzle_info swz_info; - /* check that the sampler returns a float */ - if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + /* DMUL tmp, src[0], src[1] */ + emit_instruction_opn(emit, VGPU10_OPCODE_DMUL, + &tmp_dst, &inst->Src[0], &inst->Src[1], NULL, + FALSE, inst->Instruction.Precise); - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + /* DADD dst, tmp, src[2] */ + emit_instruction_opn(emit, VGPU10_OPCODE_DADD, + &inst->Dst[0], &tmp_src, &inst->Src[2], NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + free_temp_indexes(emit); - get_texel_offsets(emit, inst, offsets); + return TRUE; +} - coord = setup_texcoord(emit, unit, &inst->Src[0]); - /* DIV tmp, coord, coord.wwww */ - emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, - &coord, &src0_wwww, FALSE); +/** + * Double precision reciprocal square root + */ +static boolean +emit_drsq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + assert(emit->version >= 50); - /* SAMPLE dst, coord(tmp), resource, sampler */ + VGPU10OpcodeToken0 token0; begin_emit_instruction(emit); - if (tgsi_is_shadow_target(target)) - /* NOTE: for non-fragment shaders, we should use - * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. - */ - opcode = VGPU10_OPCODE_SAMPLE_C; - else - opcode = VGPU10_OPCODE_SAMPLE; + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_VMWARE; + token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ; + emit_dword(emit, token0.value); - emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &tmp_src); /* projected coord */ - emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); - if (opcode == VGPU10_OPCODE_SAMPLE_C) { - emit_tex_compare_refcoord(emit, target, &tmp_src); - } - end_emit_instruction(emit); + emit_dst_register(emit, dst); - end_tex_swizzle(emit, &swz_info); + check_double_src_swizzle(src); + emit_src_register(emit, src); - free_temp_indexes(emit); + end_emit_instruction(emit); return TRUE; } /** - * Emit code for TGSI_OPCODE_TXD (explicit derivatives) + * There is no SM5 opcode for double precision square root. + * It will be implemented with DRSQ. + * dst = src * DRSQ(src) */ static boolean -emit_txd(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_dsqrt(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[3].Register.Index; - const enum tgsi_texture_type target = inst->Texture.Texture; - int offsets[3]; - struct tgsi_full_src_register coord; - struct tex_swizzle_info swz_info; + assert(emit->version >= 50); - begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), - &swz_info); + check_double_src_swizzle(&inst->Src[0]); - get_texel_offsets(emit, inst, offsets); + /* temporary register to hold the source */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); - coord = setup_texcoord(emit, unit, &inst->Src[0]); + /* temporary register to hold the DEQ result */ + unsigned tmp_cond = get_temp_index(emit); + struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond); + struct tgsi_full_dst_register tmp_cond_dst_xy = + writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); + struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond); + struct tgsi_full_src_register tmp_cond_src_xy = + swizzle_src(&tmp_cond_src, + PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); + + /* The reciprocal square root of zero yields INF. + * So if the source is 0, we replace it with 1 in the tmp register. + * The later multiplication of zero in the original source will yield 0 + * in the result. + */ - /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ - begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, - inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &coord); - emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); - emit_src_register(emit, &inst->Src[1]); /* Xderiv */ - emit_src_register(emit, &inst->Src[2]); /* Yderiv */ - end_emit_instruction(emit); + /* tmp1 = (src == 0) ? 1 : src; + * EQ tmp1, 0, src + * MOVC tmp, tmp1, 1.0, src + */ + struct tgsi_full_src_register zero = + make_immediate_reg_double(emit, 0); - end_tex_swizzle(emit, &swz_info); + struct tgsi_full_src_register one = + make_immediate_reg_double(emit, 1.0); + + emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy, + &zero, &inst->Src[0]); + emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst, + &tmp_cond_src_xy, &one, &inst->Src[0]); + + struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp); + + /* DRSQ tmp_rsq, tmp */ + emit_drsq(emit, &tmp_rsq_dst, &tmp_src); + + /* DMUL dst, tmp_rsq, src[0] */ + emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0], + &tmp_rsq_src, &inst->Src[0]); free_temp_indexes(emit); @@ -5530,52 +8657,35 @@ emit_txd(struct svga_shader_emitter_v10 *emit, } -/** - * Emit code for TGSI_OPCODE_TXF (texel fetch) - */ static boolean -emit_txf(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_interp_offset(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; - const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) - && emit->key.tex[unit].num_samples > 1; - int offsets[3]; - struct tex_swizzle_info swz_info; + assert(emit->version >= 50); - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); - - get_texel_offsets(emit, inst, offsets); - - if (msaa) { - assert(emit->key.tex[unit].num_samples > 1); - - /* Fetch one sample from an MSAA texture */ - struct tgsi_full_src_register sampleIndex = - scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); - /* LD_MS dst, coord(s0), resource, sampleIndex */ - begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, - inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &inst->Src[0]); - emit_resource_register(emit, unit); - emit_src_register(emit, &sampleIndex); - end_emit_instruction(emit); - } - else { - /* Fetch one texel specified by integer coordinate */ - /* LD dst, coord(s0), resource */ - begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_LD, - inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &inst->Src[0]); - emit_resource_register(emit, unit); - end_emit_instruction(emit); - } + /* The src1.xy offset is a float with values in the range [-0.5, 0.5] + * where (0,0) is the center of the pixel. We need to translate that + * into an integer offset on a 16x16 grid in the range [-8/16, 7/16]. + * Also need to flip the Y axis (I think). + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst_xy = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); + struct tgsi_full_src_register const16 = + make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0); - end_tex_swizzle(emit, &swz_info); + /* MUL tmp.xy, src1, {16, -16, 0, 0} */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, + &tmp_dst_xy, &inst->Src[1], &const16); + + /* FTOI tmp.xy, tmp */ + emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src); + + /* EVAL_SNAPPED dst, src0, tmp */ + emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED, + &inst->Dst[0], &inst->Src[0], &tmp_src); free_temp_indexes(emit); @@ -5584,164 +8694,259 @@ emit_txf(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) - * or TGSI_OPCODE_TXB2 (for cube shadow maps). + * Emit a simple instruction (like ADD, MUL, MIN, etc). */ static boolean -emit_txl_txb(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_simple(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const enum tgsi_texture_type target = inst->Texture.Texture; - VGPU10_OPCODE_TYPE opcode; - unsigned unit; - int offsets[3]; - struct tgsi_full_src_register coord, lod_bias; - struct tex_swizzle_info swz_info; - - assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || - inst->Instruction.Opcode == TGSI_OPCODE_TXB || - inst->Instruction.Opcode == TGSI_OPCODE_TXB2); + const enum tgsi_opcode opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); + const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); + const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); + unsigned i; - if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { - lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); - unit = inst->Src[2].Register.Index; + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { + emit->current_loop_depth++; } - else { - lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); - unit = inst->Src[1].Register.Index; + else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { + emit->current_loop_depth--; } - begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), - &swz_info); - - get_texel_offsets(emit, inst, offsets); - - coord = setup_texcoord(emit, unit, &inst->Src[0]); - - /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ begin_emit_instruction(emit); - if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { - opcode = VGPU10_OPCODE_SAMPLE_L; + emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode), + inst->Instruction.Saturate, + inst->Instruction.Precise); + for (i = 0; i < op->num_dst; i++) { + if (dbl_dst) { + check_double_dst_writemask(inst); + } + emit_dst_register(emit, &inst->Dst[i]); } - else { - opcode = VGPU10_OPCODE_SAMPLE_B; + for (i = 0; i < op->num_src; i++) { + if (dbl_src) { + check_double_src_swizzle(&inst->Src[i]); + } + emit_src_register(emit, &inst->Src[i]); } - emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &coord); - emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); - emit_src_register(emit, &lod_bias); end_emit_instruction(emit); - end_tex_swizzle(emit, &swz_info); - - free_temp_indexes(emit); - return TRUE; } /** - * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array. + * Emit MSB instruction (like IMSB, UMSB). + * + * GLSL returns the index starting from the LSB; + * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB. + * To get correct location as per glsl from SM5 device, we should + * return (31 - index) if returned index is not -1. */ static boolean -emit_txl2(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_msb(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - unsigned target = inst->Texture.Texture; - unsigned opcode, unit; - int offsets[3]; - struct tgsi_full_src_register coord, lod; - struct tex_swizzle_info swz_info; + const struct tgsi_full_dst_register *index_dst = &inst->Dst[0]; - assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2); + assert(index_dst->Register.File != TGSI_FILE_OUTPUT); - lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); - unit = inst->Src[2].Register.Index; + struct tgsi_full_src_register index_src = + make_src_reg(index_dst->Register.File, index_dst->Register.Index); + struct tgsi_full_src_register imm31 = + make_immediate_reg_int(emit, 31); + imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X); + struct tgsi_full_src_register neg_one = + make_immediate_reg_int(emit, -1); + neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X); + unsigned tmp = get_temp_index(emit); + const struct tgsi_full_dst_register tmp_dst = + make_dst_temp_reg(tmp); + const struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + const struct tgsi_full_src_register tmp_src_x = + make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X); + int writemask = TGSI_WRITEMASK_X; + int src_swizzle = TGSI_SWIZZLE_X; + int dst_writemask = index_dst->Register.WriteMask; - begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), - &swz_info); + emit_simple(emit, inst); - get_texel_offsets(emit, inst, offsets); + /* index conversion from SM5 to GLSL */ + while (writemask & dst_writemask) { + struct tgsi_full_src_register index_src_comp = + scalar_src(&index_src, src_swizzle); + struct tgsi_full_dst_register index_dst_comp = + writemask_dst(index_dst, writemask); - coord = setup_texcoord(emit, unit, &inst->Src[0]); + /* check if index_src_comp != -1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_INE, + &tmp_dst_x, &index_src_comp, &neg_one); - /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */ - begin_emit_instruction(emit); - opcode = VGPU10_OPCODE_SAMPLE_L; - emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); - emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); - emit_src_register(emit, &coord); - emit_resource_register(emit, unit); - emit_sampler_register(emit, unit); - emit_src_register(emit, &lod); - end_emit_instruction(emit); + /* if */ + emit_if(emit, &tmp_src_x); - end_tex_swizzle(emit, &swz_info); + index_src_comp = negate_src(&index_src_comp); + /* SUB DST, IMM{31}, DST */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, + &index_dst_comp, &imm31, &index_src_comp); - free_temp_indexes(emit); + /* endif */ + emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); + writemask = writemask << 1; + src_swizzle = src_swizzle + 1; + } + free_temp_indexes(emit); return TRUE; } /** - * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. + * Emit a BFE instruction (like UBFE, IBFE). + * tgsi representation: + * U/IBFE dst, value, offset, width + * SM5 representation: + * U/IBFE dst, width, offset, value + * Note: SM5 has width & offset range (0-31); + * whereas GLSL has width & offset range (0-32) */ static boolean -emit_txq(struct svga_shader_emitter_v10 *emit, +emit_bfe(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; + const enum tgsi_opcode opcode = inst->Instruction.Opcode; + struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); + imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); + struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); + zero = scalar_src(&zero, TGSI_SWIZZLE_X); - if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { - /* RESINFO does not support querying texture buffers, so we instead - * store texture buffer sizes in shader constants, then copy them to - * implement TXQ instead of emitting RESINFO. - * MOV dst, const[texture_buffer_size_index[unit]] - */ - struct tgsi_full_src_register size_src = - make_src_const_reg(emit->texture_buffer_size_index[unit]); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, - FALSE); - } else { - /* RESINFO dst, srcMipLevel, resource */ - begin_emit_instruction(emit); - emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); - emit_dst_register(emit, &inst->Dst[0]); - emit_src_register(emit, &inst->Src[0]); - emit_resource_register(emit, unit); - end_emit_instruction(emit); - } + unsigned tmp1 = get_temp_index(emit); + const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); + const struct tgsi_full_dst_register cond1_dst_x = + writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); + const struct tgsi_full_src_register cond1_src_x = + make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); - free_temp_indexes(emit); + unsigned tmp2 = get_temp_index(emit); + const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); + const struct tgsi_full_dst_register cond2_dst_x = + writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); + const struct tgsi_full_src_register cond2_src_x = + make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); + + /** + * In SM5, when width = 32 and offset = 0, it returns 0. + * On the other hand GLSL, expects value to be copied as it is, to dst. + */ + + /* cond1 = width ! = 32 */ + emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, + &cond1_dst_x, &inst->Src[2], &imm32); + + /* cond2 = offset ! = 0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, + &cond2_dst_x, &inst->Src[1], &zero); + + /* cond 2 = cond1 & cond 2 */ + emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x, + &cond2_src_x, + &cond1_src_x); + /* IF */ + emit_if(emit, &cond2_src_x); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &inst->Src[0]); + + /* ELSE */ + emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); + + /* U/IBFE dst, width, offset, value */ + emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0], + &inst->Src[2], &inst->Src[1], &inst->Src[0]); + /* ENDIF */ + emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); + + free_temp_indexes(emit); return TRUE; } /** - * Emit a simple instruction (like ADD, MUL, MIN, etc). + * Emit BFI instruction + * tgsi representation: + * BFI dst, base, insert, offset, width + * SM5 representation: + * BFI dst, width, offset, insert, base + * Note: SM5 has width & offset range (0-31); + * whereas GLSL has width & offset range (0-32) */ static boolean -emit_simple(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +emit_bfi(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { const enum tgsi_opcode opcode = inst->Instruction.Opcode; - const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); - unsigned i; + struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); + imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); + + struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); + zero = scalar_src(&zero, TGSI_SWIZZLE_X); + + unsigned tmp1 = get_temp_index(emit); + const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); + const struct tgsi_full_dst_register cond1_dst_x = + writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); + const struct tgsi_full_src_register cond1_src_x = + make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); + + unsigned tmp2 = get_temp_index(emit); + const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); + const struct tgsi_full_dst_register cond2_dst_x = + writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); + const struct tgsi_full_src_register cond2_src_x = + make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); + + /** + * In SM5, when width = 32 and offset = 0, it returns 0. + * On the other hand GLSL, expects insert to be copied as it is, to dst. + */ + /* cond1 = width == 32 */ + emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, + &cond1_dst_x, &inst->Src[3], &imm32); + + /* cond1 = offset == 0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, + &cond2_dst_x, &inst->Src[2], &zero); + + /* cond2 = cond1 & cond2 */ + emit_instruction_op2(emit, VGPU10_OPCODE_AND, + &cond2_dst_x, &cond2_src_x, &cond1_src_x); + + /* if */ + emit_if(emit, &cond2_src_x); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &inst->Src[1]); + + /* else */ + emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); + + /* BFI dst, width, offset, insert, base */ begin_emit_instruction(emit); emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); - for (i = 0; i < op->num_dst; i++) { - emit_dst_register(emit, &inst->Dst[i]); - } - for (i = 0; i < op->num_src; i++) { - emit_src_register(emit, &inst->Src[i]); - } + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &inst->Src[3]); + emit_src_register(emit, &inst->Src[2]); + emit_src_register(emit, &inst->Src[1]); + emit_src_register(emit, &inst->Src[0]); end_emit_instruction(emit); + /* endif */ + emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); + + free_temp_indexes(emit); return TRUE; } @@ -5803,6 +9008,56 @@ emit_simple_1dst(struct svga_shader_emitter_v10 *emit, } +/** + * Emit a vmware specific VGPU10 instruction. + */ +static boolean +emit_vmware(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + VGPU10_VMWARE_OPCODE_TYPE subopcode) +{ + VGPU10OpcodeToken0 token0; + const enum tgsi_opcode opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); + const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); + const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); + + unsigned i; + + begin_emit_instruction(emit); + + assert((subopcode > 0 && emit->version >= 50) || subopcode == 0); + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_VMWARE; + token0.vmwareOpcodeType = subopcode; + emit_dword(emit, token0.value); + + if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) { + /* IDIV only uses the first dest register. */ + emit_dst_register(emit, &inst->Dst[0]); + emit_null_dst_register(emit); + } else { + for (i = 0; i < op->num_dst; i++) { + if (dbl_dst) { + check_double_dst_writemask(inst); + } + emit_dst_register(emit, &inst->Dst[i]); + } + } + + for (i = 0; i < op->num_src; i++) { + if (dbl_src) { + check_double_src_swizzle(&inst->Src[i]); + } + emit_src_register(emit, &inst->Src[i]); + } + end_emit_instruction(emit); + + return TRUE; +} + + /** * Translate a single TGSI instruction to VGPU10. */ @@ -5813,6 +9068,9 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, { const enum tgsi_opcode opcode = inst->Instruction.Opcode; + if (emit->skip_instruction) + return TRUE; + switch (opcode) { case TGSI_OPCODE_ADD: case TGSI_OPCODE_AND: @@ -5852,7 +9110,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_NOP: case TGSI_OPCODE_NOT: case TGSI_OPCODE_OR: - case TGSI_OPCODE_RET: case TGSI_OPCODE_UADD: case TGSI_OPCODE_USEQ: case TGSI_OPCODE_USGE: @@ -5869,9 +9126,41 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_USHR: case TGSI_OPCODE_USNE: case TGSI_OPCODE_XOR: + /* Begin SM5 opcodes */ + case TGSI_OPCODE_F2D: + case TGSI_OPCODE_D2F: + case TGSI_OPCODE_DADD: + case TGSI_OPCODE_DMUL: + case TGSI_OPCODE_DMAX: + case TGSI_OPCODE_DMIN: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_BREV: + case TGSI_OPCODE_POPC: + case TGSI_OPCODE_LSB: + case TGSI_OPCODE_INTERP_CENTROID: + case TGSI_OPCODE_INTERP_SAMPLE: /* simple instructions */ return emit_simple(emit, inst); + case TGSI_OPCODE_RET: + if (emit->unit == PIPE_SHADER_TESS_CTRL && + !emit->tcs.control_point_phase) { + + /* store the tessellation levels in the patch constant phase only */ + store_tesslevels(emit); + } + return emit_simple(emit, inst); + case TGSI_OPCODE_IMSB: + case TGSI_OPCODE_UMSB: + return emit_msb(emit, inst); + case TGSI_OPCODE_IBFE: + case TGSI_OPCODE_UBFE: + return emit_bfe(emit, inst); + case TGSI_OPCODE_BFI: + return emit_bfi(emit, inst); case TGSI_OPCODE_MOV: return emit_mov(emit, inst); case TGSI_OPCODE_EMIT: @@ -5900,7 +9189,7 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_EXP: return emit_exp(emit, inst); case TGSI_OPCODE_IF: - return emit_if(emit, inst); + return emit_if(emit, &inst->Src[0]); case TGSI_OPCODE_KILL: return emit_kill(emit, inst); case TGSI_OPCODE_KILL_IF: @@ -5962,18 +9251,90 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_TXQ: return emit_txq(emit, inst); case TGSI_OPCODE_UIF: - return emit_if(emit, inst); + return emit_if(emit, &inst->Src[0]); case TGSI_OPCODE_UMUL_HI: case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_UDIV: - case TGSI_OPCODE_IDIV: /* These cases use only the FIRST of two destination registers */ return emit_simple_1dst(emit, inst, 2, 0); + case TGSI_OPCODE_IDIV: + return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV); case TGSI_OPCODE_UMUL: case TGSI_OPCODE_UMOD: case TGSI_OPCODE_MOD: /* These cases use only the SECOND of two destination registers */ return emit_simple_1dst(emit, inst, 2, 1); + + /* Begin SM5 opcodes */ + case TGSI_OPCODE_DABS: + return emit_dabs(emit, inst); + case TGSI_OPCODE_DNEG: + return emit_dneg(emit, inst); + case TGSI_OPCODE_DRCP: + return emit_simple(emit, inst); + case TGSI_OPCODE_DSQRT: + return emit_dsqrt(emit, inst); + case TGSI_OPCODE_DMAD: + return emit_dmad(emit, inst); + case TGSI_OPCODE_DFRAC: + return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC); + case TGSI_OPCODE_D2I: + case TGSI_OPCODE_D2U: + return emit_simple(emit, inst); + case TGSI_OPCODE_I2D: + case TGSI_OPCODE_U2D: + return emit_simple(emit, inst); + case TGSI_OPCODE_DRSQ: + return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]); + case TGSI_OPCODE_DDIV: + return emit_simple(emit, inst); + case TGSI_OPCODE_INTERP_OFFSET: + return emit_interp_offset(emit, inst); + + /* The following opcodes should never be seen here. We return zero + * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, + * FMA_SUPPORTED, LDEXP_SUPPORTED queries. + */ + case TGSI_OPCODE_FMA: + case TGSI_OPCODE_LDEXP: + case TGSI_OPCODE_DSSG: + case TGSI_OPCODE_DFRACEXP: + case TGSI_OPCODE_DLDEXP: + case TGSI_OPCODE_DTRUNC: + case TGSI_OPCODE_DCEIL: + case TGSI_OPCODE_DFLR: + debug_printf("Unexpected TGSI opcode %s. " + "Should have been translated away by the GLSL compiler.\n", + tgsi_get_opcode_name(opcode)); + return FALSE; + + case TGSI_OPCODE_LOAD: + case TGSI_OPCODE_STORE: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMXOR: + return FALSE; + case TGSI_OPCODE_BARRIER: + if (emit->unit == PIPE_SHADER_TESS_CTRL) { + /* SM5 device doesn't support BARRIER in tcs . If barrier is used + * in shader, don't do anything for this opcode and continue rest + * of shader translation + */ + pipe_debug_message(&emit->svga_debug_callback, INFO, + "barrier instruction is not supported in tessellation control shader\n"); + return TRUE; + } + else { + return emit_simple(emit, inst); + } + case TGSI_OPCODE_END: if (!emit_post_helpers(emit)) return FALSE; @@ -5998,11 +9359,11 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, * \param vs_pos_tmp_index which temporary register contains the vertex pos. */ static void -emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, - unsigned vs_pos_tmp_index) +emit_vpos_instructions(struct svga_shader_emitter_v10 *emit) { struct tgsi_full_src_register tmp_pos_src; struct tgsi_full_dst_register pos_dst; + const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; /* Don't bother to emit any extra vertex instructions if vertex position is * not written out @@ -6010,6 +9371,12 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, if (emit->vposition.out_index == INVALID_INDEX) return; + /** + * Reset the temporary vertex position register index + * so that emit_dst_register() will use the real vertex position output + */ + emit->vposition.tmp_index = INVALID_INDEX; + tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); pos_dst = make_dst_output_reg(emit->vposition.out_index); @@ -6023,8 +9390,7 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, make_dst_output_reg(emit->vposition.so_index); /* MOV pos_so, tmp_pos */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, - &tmp_pos_src, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src); } if (emit->vposition.need_prescale) { @@ -6045,17 +9411,17 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); struct tgsi_full_src_register prescale_scale = - make_src_const_reg(emit->vposition.prescale_scale_index); + make_src_temp_reg(emit->vposition.prescale_scale_index); struct tgsi_full_src_register prescale_trans = - make_src_const_reg(emit->vposition.prescale_trans_index); + make_src_temp_reg(emit->vposition.prescale_trans_index); /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, - &tmp_pos_src, &prescale_scale, FALSE); + &tmp_pos_src, &prescale_scale); /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, - &prescale_trans, &tmp_pos_src, FALSE); + &prescale_trans, &tmp_pos_src); } else if (emit->key.vs.undo_viewport) { /* This code computes the final vertex position from the temporary @@ -6090,19 +9456,18 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, - &tmp_pos_src, &vp_zwww, FALSE); + &tmp_pos_src, &vp_zwww); /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, - &tmp_pos_src, &vp_xyzw, FALSE); + &tmp_pos_src, &vp_xyzw); /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, - &tmp_pos_src, &tmp_pos_src_wwww, FALSE); + &tmp_pos_src, &tmp_pos_src_wwww); /* MOV pos.w, tmp_pos.w */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, - &tmp_pos_src, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src); } else if (vs_pos_tmp_index != INVALID_INDEX) { /* This code is to handle the case where the temporary vertex @@ -6120,6 +9485,11 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, emit_src_register(emit, &tmp_pos_src); end_emit_instruction(emit); } + + /* Restore original vposition.tmp_index value for the next GS vertex. + * It doesn't matter for VS. + */ + emit->vposition.tmp_index = vs_pos_tmp_index; } static void @@ -6129,7 +9499,8 @@ emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ emit_clip_distance_instructions(emit); - } else if (emit->clip_mode == CLIP_VERTEX) { + } else if (emit->clip_mode == CLIP_VERTEX && + emit->key.last_vertex_stage) { /* Convert TGSI CLIPVERTEX to CLIPDIST */ emit_clip_vertex_instructions(emit); } @@ -6150,7 +9521,7 @@ emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) * emit_vpos_instructions() call since the later function will change * the TEMP[vs_pos_tmp_index] value. */ - if (emit->clip_mode == CLIP_LEGACY) { + if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) { /* Emit CLIPDIST for legacy user defined clip planes */ emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); } @@ -6165,26 +9536,14 @@ emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) static void emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) { - const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; - /* Emit clipping instructions based on clipping mode */ emit_clipping_instructions(emit); - /** - * Reset the temporary vertex position register index - * so that emit_dst_register() will use the real vertex position output - */ - emit->vposition.tmp_index = INVALID_INDEX; - /* Emit vertex position instructions */ - emit_vpos_instructions(emit, vs_pos_tmp_index); - - /* Restore original vposition.tmp_index value for the next GS vertex. - * It doesn't matter for VS. - */ - emit->vposition.tmp_index = vs_pos_tmp_index; + emit_vpos_instructions(emit); } + /** * Translate the TGSI_OPCODE_EMIT GS instruction. */ @@ -6196,13 +9555,66 @@ emit_vertex(struct svga_shader_emitter_v10 *emit, assert(emit->unit == PIPE_SHADER_GEOMETRY); - emit_vertex_instructions(emit); + /** + * Emit the viewport array index for the first vertex. + */ + if (emit->gs.viewport_index_out_index != INVALID_INDEX) { + struct tgsi_full_dst_register viewport_index_out = + make_dst_output_reg(emit->gs.viewport_index_out_index); + struct tgsi_full_dst_register viewport_index_out_x = + writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X); + struct tgsi_full_src_register viewport_index_tmp = + make_src_temp_reg(emit->gs.viewport_index_tmp_index); + + /* Set the out index to INVALID_INDEX, so it will not + * be assigned to a temp again in emit_dst_register, and + * the viewport index will not be assigned again in the + * subsequent vertices. + */ + emit->gs.viewport_index_out_index = INVALID_INDEX; + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &viewport_index_out_x, &viewport_index_tmp); + } + + /** + * Find the stream index associated with this emit vertex instruction. + */ + assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); + unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); + + /** + * According to the ARB_gpu_shader5 spec, the built-in geometry shader + * outputs are always associated with vertex stream zero. + * So emit the extra vertex instructions for position or clip distance + * for stream zero only. + */ + if (streamIndex == 0) { + /** + * Before emitting vertex instructions, emit the temporaries for + * the prescale constants based on the viewport index if needed. + */ + if (emit->vposition.need_prescale && !emit->vposition.have_prescale) + emit_temp_prescale_instructions(emit); + + emit_vertex_instructions(emit); + } - /* We can't use emit_simple() because the TGSI instruction has one - * operand (vertex stream number) which we must ignore for VGPU10. - */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); + if (emit->version >= 50) { + if (emit->info.num_stream_output_components[streamIndex] == 0) { + /** + * If there is no output for this stream, discard this instruction. + */ + emit->discard_instruction = TRUE; + } + else { + emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE); + emit_stream_register(emit, streamIndex); + } + } + else { + emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); + } end_emit_instruction(emit); return ret; @@ -6399,11 +9811,11 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) /* ITOF/UTOF/MOV tmp, input[index] */ if (save_itof_mask & (1 << index)) { emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, - &tmp_dst, &input_src, FALSE); + &tmp_dst, &input_src); } else if (save_utof_mask & (1 << index)) { emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, - &tmp_dst, &input_src, FALSE); + &tmp_dst, &input_src); } else if (save_puint_to_snorm_mask & (1 << index)) { emit_puint_to_snorm(emit, &tmp_dst, &input_src); @@ -6417,7 +9829,7 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) else { assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &tmp_dst, &input_src, FALSE); + &tmp_dst, &input_src); } if (save_is_bgra_mask & (1 << index)) { @@ -6428,11 +9840,11 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) /* MOV tmp.w, 1.0 */ if (emit->key.vs.attrib_is_pure_int & (1 << index)) { emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &tmp_dst_w, &one_int, FALSE); + &tmp_dst_w, &one_int); } else { emit_instruction_op1(emit, VGPU10_OPCODE_MOV, - &tmp_dst_w, &one, FALSE); + &tmp_dst_w, &one); } } } @@ -6448,58 +9860,281 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) } -/** - * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed - * to implement some instructions. We pre-allocate those values here - * in the immediate constant buffer. - */ +/* Find zero-value immedate for default layer index */ static void -alloc_common_immediates(struct svga_shader_emitter_v10 *emit) +emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit) { - unsigned n = 0; + assert(emit->unit == PIPE_SHADER_FRAGMENT); - emit->common_immediate_pos[n++] = - alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); + /* immediate for default layer index 0 */ + if (emit->fs.layer_input_index != INVALID_INDEX) { + union tgsi_immediate_data imm; + imm.Int = 0; + emit->fs.layer_imm_index = find_immediate(emit, imm, 0); + } +} - if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) { - emit->common_immediate_pos[n++] = - alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f); + +static void +emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, + unsigned cbuf_index, + struct tgsi_full_dst_register *scale, + struct tgsi_full_dst_register *translate) +{ + struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index); + struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf); +} + + +/** + * A recursive helper function to find the prescale from the constant buffer + */ +static void +find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, + unsigned index, unsigned num_prescale, + struct tgsi_full_src_register *vp_index, + struct tgsi_full_dst_register *scale, + struct tgsi_full_dst_register *translate, + struct tgsi_full_src_register *tmp_src, + struct tgsi_full_dst_register *tmp_dst) +{ + if (num_prescale == 0) + return; + + if (index > 0) { + /* ELSE */ + emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); } - emit->common_immediate_pos[n++] = - alloc_immediate_int4(emit, 0, 1, 0, -1); + struct tgsi_full_src_register index_src = + make_immediate_reg_int(emit, index); - if (emit->key.vs.attrib_puint_to_snorm) { - emit->common_immediate_pos[n++] = - alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f); + if (index == 0) { + /* GE tmp, vp_index, index */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst, + vp_index, &index_src); + } else { + /* EQ tmp, vp_index, index */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst, + vp_index, &index_src); } - if (emit->key.vs.attrib_puint_to_uscaled) { - emit->common_immediate_pos[n++] = - alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); + /* IF tmp */ + emit_if(emit, tmp_src); + emit_temp_prescale_from_cbuf(emit, + emit->vposition.prescale_cbuf_index + 2 * index, + scale, translate); + + find_prescale_from_cbuf(emit, index+1, num_prescale-1, + vp_index, scale, translate, + tmp_src, tmp_dst); + + /* ENDIF */ + emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); +} + + +/** + * This helper function emits instructions to set the prescale + * and translate temporaries to the correct constants from the + * constant buffer according to the designated viewport. + */ +static void +emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit) +{ + struct tgsi_full_dst_register prescale_scale = + make_dst_temp_reg(emit->vposition.prescale_scale_index); + struct tgsi_full_dst_register prescale_translate = + make_dst_temp_reg(emit->vposition.prescale_trans_index); + + unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index; + + if (emit->vposition.num_prescale == 1) { + emit_temp_prescale_from_cbuf(emit, + prescale_cbuf_index, + &prescale_scale, &prescale_translate); + } else { + /** + * Since SM5 device does not support dynamic indexing, we need + * to do the if-else to find the prescale constants for the + * specified viewport. + */ + struct tgsi_full_src_register vp_index_src = + make_src_temp_reg(emit->gs.viewport_index_tmp_index); + + struct tgsi_full_src_register vp_index_src_x = + scalar_src(&vp_index_src, TGSI_SWIZZLE_X); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_src_register tmp_src_x = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale, + &vp_index_src_x, + &prescale_scale, &prescale_translate, + &tmp_src_x, &tmp_dst); } - if (emit->key.vs.attrib_puint_to_sscaled) { - emit->common_immediate_pos[n++] = - alloc_immediate_int4(emit, 22, 12, 2, 0); + /* Mark prescale temporaries are emitted */ + emit->vposition.have_prescale = 1; +} - emit->common_immediate_pos[n++] = - alloc_immediate_int4(emit, 22, 30, 0, 0); + +/** + * Hull Shader must have control point outputs. But tessellation + * control shader can return without writing to control point output. + * In this case, the control point output is assumed to be passthrough + * from the control point input. + * This helper function is to write out a control point output first in case + * the tessellation control shader returns before writing a + * control point output. + */ +static void +emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->unit == PIPE_SHADER_TESS_CTRL); + assert(emit->tcs.control_point_phase); + assert(emit->tcs.control_point_input_index != INVALID_INDEX); + assert(emit->tcs.control_point_out_index != INVALID_INDEX); + assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX); + + /* UARL ADDR[INDEX].x INVOCATION.xxxx */ + + struct tgsi_full_src_register invocation_src; + struct tgsi_full_dst_register addr_dst; + struct tgsi_full_dst_register addr_dst_x; + unsigned addr_tmp; + + addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; + addr_dst = make_dst_temp_reg(addr_tmp); + addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); + + invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, + emit->tcs.invocation_id_sys_index); + + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); + emit_dst_register(emit, &addr_dst_x); + emit_src_register(emit, &invocation_src); + end_emit_instruction(emit); + + + /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ + + struct tgsi_full_src_register input_control_point; + struct tgsi_full_dst_register output_control_point; + + input_control_point = make_src_reg(TGSI_FILE_INPUT, + emit->tcs.control_point_input_index); + input_control_point.Register.Dimension = 1; + input_control_point.Dimension.Indirect = 1; + input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; + input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index; + output_control_point = + make_dst_output_reg(emit->tcs.control_point_out_index); + + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); + emit_dst_register(emit, &output_control_point); + emit_src_register(emit, &input_control_point); + end_emit_instruction(emit); +} + +/** + * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR + * values in domain shader. SM5 has tessfactors as floating point values where + * as tgsi emit them as vector. This function allows to construct temp + * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with + * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever + * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader. + */ +static void +emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit) +{ + struct tgsi_full_src_register src; + struct tgsi_full_dst_register dst; + + if (emit->tes.inner.tgsi_index != INVALID_INDEX) { + dst = make_dst_temp_reg(emit->tes.inner.temp_index); + + switch (emit->tes.prim_mode) { + case PIPE_PRIM_QUADS: + src = make_src_scalar_reg(TGSI_FILE_INPUT, + emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X); + dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + case PIPE_PRIM_TRIANGLES: + src = make_src_scalar_reg(TGSI_FILE_INPUT, + emit->tes.inner.in_index, TGSI_SWIZZLE_X); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + break; + case PIPE_PRIM_LINES: + /** + * As per SM5 spec, InsideTessFactor for isolines are unused. + * In fact glsl tessInnerLevel for isolines doesn't mean anything but if + * any application try to read tessInnerLevel in TES when primitive type + * is isolines, then instead of driver throwing segfault for accesing it, + * return atleast vec(1.0f) + */ + src = make_immediate_reg_float(emit, 1.0f); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + break; + default: + break; + } } - unsigned i; + if (emit->tes.outer.tgsi_index != INVALID_INDEX) { + dst = make_dst_temp_reg(emit->tes.outer.temp_index); + + switch (emit->tes.prim_mode) { + case PIPE_PRIM_QUADS: + src = make_src_scalar_reg(TGSI_FILE_INPUT, + emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X); + dst = writemask_dst(&dst, TGSI_WRITEMASK_W); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + case PIPE_PRIM_TRIANGLES: + src = make_src_scalar_reg(TGSI_FILE_INPUT, + emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X); + dst = writemask_dst(&dst, TGSI_WRITEMASK_Z); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + case PIPE_PRIM_LINES: + src = make_src_scalar_reg(TGSI_FILE_INPUT, + emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X); + dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + + src = make_src_scalar_reg(TGSI_FILE_INPUT, + emit->tes.outer.in_index , TGSI_SWIZZLE_X); + dst = writemask_dst(&dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (emit->key.tex[i].texel_bias) { - /* Replace 0.0f if more immediate float value is needed */ - emit->common_immediate_pos[n++] = - alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f); + break; + default: break; } } +} - assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); - emit->num_common_immediates = n; + +static void +emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit) +{ + struct tgsi_full_src_register src; + struct tgsi_full_dst_register dst; + unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY, + emit->initialize_temp_index); + src = make_immediate_reg_float(emit, 0.0f); + dst = make_dst_temp_reg(vgpu10_temp_index); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); + emit->temp_map[emit->initialize_temp_index].initialized = TRUE; + emit->initialize_temp_index = INVALID_INDEX; } @@ -6513,6 +10148,25 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) /* Properties */ if (emit->unit == PIPE_SHADER_GEOMETRY) emit_property_instructions(emit); + else if (emit->unit == PIPE_SHADER_TESS_CTRL) { + emit_hull_shader_declarations(emit); + + /* Save the position of the first instruction token so that we can + * do a second pass of the instructions for the patch constant phase. + */ + emit->tcs.instruction_token_pos = emit->cur_tgsi_token; + + if (!emit_hull_shader_control_point_phase(emit)) { + emit->skip_instruction = TRUE; + return TRUE; + } + + /* Set the current tcs phase to control point phase */ + emit->tcs.control_point_phase = TRUE; + } + else if (emit->unit == PIPE_SHADER_TESS_EVAL) { + emit_domain_shader_declarations(emit); + } /* Declare inputs */ if (!emit_input_declarations(emit)) @@ -6525,20 +10179,30 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) /* Declare temporary registers */ emit_temporaries_declaration(emit); - /* Declare constant registers */ - emit_constant_declaration(emit); + /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates + * will already be declared in hs_decls (emit_hull_shader_declarations) + */ + if (emit->unit != PIPE_SHADER_TESS_CTRL) { + /* Declare constant registers */ + emit_constant_declaration(emit); - /* Declare samplers and resources */ - emit_sampler_declarations(emit); - emit_resource_declarations(emit); + /* Declare samplers and resources */ + emit_sampler_declarations(emit); + emit_resource_declarations(emit); - /* Declare clip distance output registers */ - if (emit->unit == PIPE_SHADER_VERTEX || - emit->unit == PIPE_SHADER_GEOMETRY) { - emit_clip_distance_declarations(emit); + alloc_common_immediates(emit); + /* Now, emit the constant block containing all the immediates + * declared by shader, as well as the extra ones seen above. + */ } - alloc_common_immediates(emit); + if (emit->unit != PIPE_SHADER_FRAGMENT) { + /* + * Declare clip distance output registers for ClipVertex or + * user defined planes + */ + emit_clip_distance_declarations(emit); + } if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { @@ -6547,19 +10211,36 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); } - /* Now, emit the constant block containing all the immediates - * declared by shader, as well as the extra ones seen above. - */ - emit_vgpu10_immediates_block(emit); + if (emit->unit != PIPE_SHADER_TESS_CTRL) { + /** + * For PIPE_SHADER_TESS_CTRL, immediates are already declared in + * hs_decls + */ + emit_vgpu10_immediates_block(emit); + } + else { + emit_tcs_default_control_point_output(emit); + } if (emit->unit == PIPE_SHADER_FRAGMENT) { emit_frontface_instructions(emit); emit_fragcoord_instructions(emit); emit_sample_position_instructions(emit); + emit_default_layer_instructions(emit); } else if (emit->unit == PIPE_SHADER_VERTEX) { emit_vertex_attrib_instructions(emit); } + else if (emit->unit == PIPE_SHADER_TESS_EVAL) { + emit_temp_tessfactor_instructions(emit); + } + + /** + * For geometry shader that writes to viewport index, the prescale + * temporaries will be done at the first vertex emission. + */ + if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1) + emit_temp_prescale_instructions(emit); return TRUE; } @@ -6601,7 +10282,7 @@ emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit, color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W); - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one); } } @@ -6646,8 +10327,7 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, */ if (emit->key.fs.write_color0_to_n_cbufs <= 1) { /* MOV output.color, tempcolor */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, - &color_src, FALSE); /* XXX saturate? */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); } free_temp_indexes(emit); @@ -6694,8 +10374,7 @@ emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; /* MOV output.color[i], tempcolor */ - emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, - &color_src, FALSE); /* XXX saturate? */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); } } @@ -6734,6 +10413,18 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit) emit_broadcast_color_instructions(emit, fs_color_tmp_index); } } + else if (emit->unit == PIPE_SHADER_TESS_CTRL) { + if (!emit->tcs.control_point_phase) { + /* store the tessellation levels in the patch constant phase only */ + store_tesslevels(emit); + } + else { + emit_clipping_instructions(emit); + } + } + else if (emit->unit == PIPE_SHADER_TESS_EVAL) { + emit_vertex_instructions(emit); + } return TRUE; } @@ -6754,6 +10445,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, tgsi_parse_init(&parse, tokens); while (!tgsi_parse_end_of_tokens(&parse)) { + + /* Save the current tgsi token starting position */ + emit->cur_tgsi_token = parse.Position; + tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { @@ -6778,6 +10473,24 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, } ret = emit_vgpu10_instruction(emit, inst_number++, &parse.FullToken.FullInstruction); + + /* Usually this applies to TCS only. If shader is reading control + * point outputs in control point phase, we should reemit all + * instructions which are writting into control point output in + * control phase to store results into temporaries. + */ + if (emit->reemit_instruction) { + assert(emit->unit == PIPE_SHADER_TESS_CTRL); + ret = emit_vgpu10_instruction(emit, inst_number, + &parse.FullToken.FullInstruction); + } + else if (emit->initialize_temp_index != INVALID_INDEX) { + emit_initialize_temp_instruction(emit); + emit->initialize_temp_index = INVALID_INDEX; + ret = emit_vgpu10_instruction(emit, inst_number - 1, + &parse.FullToken.FullInstruction); + } + if (!ret) goto done; break; @@ -6793,6 +10506,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, } } + if (emit->unit == PIPE_SHADER_TESS_CTRL) { + ret = emit_hull_shader_patch_constant_phase(emit, &parse); + } + done: tgsi_parse_free(&parse); return ret; @@ -6808,6 +10525,7 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) VGPU10ProgramToken ptoken; /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ + ptoken.value = 0; /* init whole token to zero */ ptoken.majorVersion = emit->version / 10; ptoken.minorVersion = emit->version % 10; ptoken.programType = translate_shader_type(emit->unit); @@ -6817,7 +10535,49 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) /* Second token: total length of shader, in tokens. We can't fill this * in until we're all done. Emit zero for now. */ - return emit_dword(emit, 0); + if (!emit_dword(emit, 0)) + return FALSE; + + if (emit->version >= 50) { + VGPU10OpcodeToken0 token; + + if (emit->unit == PIPE_SHADER_TESS_CTRL) { + /* For hull shader, we need to start the declarations phase first before + * emitting any declarations including the global flags. + */ + token.value = 0; + token.opcodeType = VGPU10_OPCODE_HS_DECLS; + begin_emit_instruction(emit); + emit_dword(emit, token.value); + end_emit_instruction(emit); + } + + /* Emit global flags */ + token.value = 0; /* init whole token to zero */ + token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; + token.enableDoublePrecisionFloatOps = 1; /* set bit */ + token.instructionLength = 1; + if (!emit_dword(emit, token.value)) + return FALSE; + } + + if (emit->version >= 40) { + VGPU10OpcodeToken0 token; + + /* Reserved for global flag such as refactoringAllowed. + * If the shader does not use the precise qualifier, we will set the + * refactoringAllowed global flag; otherwise, we will leave the reserved + * token to NOP. + */ + emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0); + token.value = 0; + token.opcodeType = VGPU10_OPCODE_NOP; + token.instructionLength = 1; + if (!emit_dword(emit, token.value)) + return FALSE; + } + + return TRUE; } @@ -6830,6 +10590,16 @@ emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) tokens = (VGPU10ProgramToken *) emit->buf; tokens[1].value = emit_get_num_tokens(emit); + if (emit->version >= 40 && !emit->uses_precise_qualifier) { + /* Replace the reserved token with the RefactoringAllowed global flag */ + VGPU10OpcodeToken0 *ptoken; + + ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; + assert(ptoken->opcodeType == VGPU10_OPCODE_NOP); + ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; + ptoken->refactoringAllowed = 1; + } + return TRUE; } @@ -6908,6 +10678,97 @@ transform_fs_aapoint(const struct tgsi_token *tokens, return tokens; } + +/** + * A helper function to determine the shader in the previous stage and + * then call the linker function to determine the input mapping for this + * shader to match the output indices from the shader in the previous stage. + */ +static void +compute_input_mapping(struct svga_context *svga, + struct svga_shader_emitter_v10 *emit, + enum pipe_shader_type unit) +{ + struct svga_shader *prevShader = NULL; /* shader in the previous stage */ + + if (unit == PIPE_SHADER_FRAGMENT) { + prevShader = svga->curr.gs ? + &svga->curr.gs->base : (svga->curr.tes ? + &svga->curr.tes->base : &svga->curr.vs->base); + } else if (unit == PIPE_SHADER_GEOMETRY) { + prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base; + } else if (unit == PIPE_SHADER_TESS_EVAL) { + assert(svga->curr.tcs); + prevShader = &svga->curr.tcs->base; + } else if (unit == PIPE_SHADER_TESS_CTRL) { + assert(svga->curr.vs); + prevShader = &svga->curr.vs->base; + } + + if (prevShader != NULL) { + svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage); + } + else { + /** + * Since vertex shader does not need to go through the linker to + * establish the input map, we need to make sure the highest index + * of input registers is set properly here. + */ + emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max, + emit->info.file_max[TGSI_FILE_INPUT]); + } +} + + +/** + * Copies the shader signature info to the shader variant + */ +static void +copy_shader_signature(struct svga_shader_signature *sgn, + struct svga_shader_variant *variant) +{ + SVGA3dDXShaderSignatureHeader *header = &sgn->header; + + /* Calculate the signature length */ + variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) + + (header->numInputSignatures + + header->numOutputSignatures + + header->numPatchConstantSignatures) * + sizeof(SVGA3dDXShaderSignatureEntry); + + /* Allocate buffer for the signature info */ + variant->signature = + (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen); + + char *sgnBuf = (char *)variant->signature; + unsigned sgnLen; + + /* Copy the signature info to the shader variant structure */ + memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader)); + sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader); + + if (header->numInputSignatures) { + sgnLen = + header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); + memcpy(sgnBuf, &sgn->inputs[0], sgnLen); + sgnBuf += sgnLen; + } + + if (header->numOutputSignatures) { + sgnLen = + header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); + memcpy(sgnBuf, &sgn->outputs[0], sgnLen); + sgnBuf += sgnLen; + } + + if (header->numPatchConstantSignatures) { + sgnLen = + header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry); + memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen); + } +} + + /** * This is the main entrypoint for the TGSI -> VPGU10 translator. */ @@ -6920,12 +10781,15 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, struct svga_shader_variant *variant = NULL; struct svga_shader_emitter_v10 *emit; const struct tgsi_token *tokens = shader->tokens; - struct svga_vertex_shader *vs = svga->curr.vs; - struct svga_geometry_shader *gs = svga->curr.gs; + + (void) make_immediate_reg_double; /* unused at this time */ assert(unit == PIPE_SHADER_VERTEX || unit == PIPE_SHADER_GEOMETRY || - unit == PIPE_SHADER_FRAGMENT); + unit == PIPE_SHADER_FRAGMENT || + unit == PIPE_SHADER_TESS_CTRL || + unit == PIPE_SHADER_TESS_EVAL || + unit == PIPE_SHADER_COMPUTE); /* These two flags cannot be used together */ assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); @@ -6939,12 +10803,29 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, goto done; emit->unit = unit; - emit->version = svga_have_sm4_1(svga) ? 41 : 40; + if (svga_have_sm5(svga)) { + emit->version = 50; + } else if (svga_have_sm4_1(svga)) { + emit->version = 41; + } else { + emit->version = 40; + } + + emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0; emit->key = *key; emit->vposition.need_prescale = (emit->key.vs.need_prescale || - emit->key.gs.need_prescale); + emit->key.gs.need_prescale || + emit->key.tes.need_prescale); + + /* Determine how many prescale factors in the constant buffer */ + emit->vposition.num_prescale = 1; + if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) { + assert(emit->unit == PIPE_SHADER_GEOMETRY); + emit->vposition.num_prescale = emit->key.gs.num_prescale; + } + emit->vposition.tmp_index = INVALID_INDEX; emit->vposition.so_index = INVALID_INDEX; emit->vposition.out_index = INVALID_INDEX; @@ -6954,13 +10835,60 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->fs.fragcoord_input_index = INVALID_INDEX; emit->fs.sample_id_sys_index = INVALID_INDEX; emit->fs.sample_pos_sys_index = INVALID_INDEX; + emit->fs.sample_mask_in_sys_index = INVALID_INDEX; + emit->fs.layer_input_index = INVALID_INDEX; + emit->fs.layer_imm_index = INVALID_INDEX; emit->gs.prim_id_index = INVALID_INDEX; + emit->gs.invocation_id_sys_index = INVALID_INDEX; + emit->gs.viewport_index_out_index = INVALID_INDEX; + emit->gs.viewport_index_tmp_index = INVALID_INDEX; + + emit->tcs.vertices_per_patch_index = INVALID_INDEX; + emit->tcs.invocation_id_sys_index = INVALID_INDEX; + emit->tcs.control_point_input_index = INVALID_INDEX; + emit->tcs.control_point_addr_index = INVALID_INDEX; + emit->tcs.control_point_out_index = INVALID_INDEX; + emit->tcs.control_point_tmp_index = INVALID_INDEX; + emit->tcs.control_point_out_count = 0; + emit->tcs.inner.out_index = INVALID_INDEX; + emit->tcs.inner.out_index = INVALID_INDEX; + emit->tcs.inner.temp_index = INVALID_INDEX; + emit->tcs.inner.tgsi_index = INVALID_INDEX; + emit->tcs.outer.out_index = INVALID_INDEX; + emit->tcs.outer.temp_index = INVALID_INDEX; + emit->tcs.outer.tgsi_index = INVALID_INDEX; + emit->tcs.patch_generic_out_count = 0; + emit->tcs.patch_generic_out_index = INVALID_INDEX; + emit->tcs.patch_generic_tmp_index = INVALID_INDEX; + emit->tcs.prim_id_index = INVALID_INDEX; + + emit->tes.tesscoord_sys_index = INVALID_INDEX; + emit->tes.inner.in_index = INVALID_INDEX; + emit->tes.inner.temp_index = INVALID_INDEX; + emit->tes.inner.tgsi_index = INVALID_INDEX; + emit->tes.outer.in_index = INVALID_INDEX; + emit->tes.outer.temp_index = INVALID_INDEX; + emit->tes.outer.tgsi_index = INVALID_INDEX; + emit->tes.prim_id_index = INVALID_INDEX; emit->clip_dist_out_index = INVALID_INDEX; emit->clip_dist_tmp_index = INVALID_INDEX; emit->clip_dist_so_index = INVALID_INDEX; emit->clip_vertex_out_index = INVALID_INDEX; + emit->clip_vertex_tmp_index = INVALID_INDEX; + emit->svga_debug_callback = svga->debug.callback; + + emit->index_range.start_index = INVALID_INDEX; + emit->index_range.count = 0; + emit->index_range.required = FALSE; + emit->index_range.operandType = VGPU10_NUM_OPERANDS; + emit->index_range.dim = 0; + emit->index_range.size = 0; + + emit->current_loop_depth = 0; + + emit->initialize_temp_index = INVALID_INDEX; if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; @@ -7002,34 +10930,21 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->num_outputs = emit->info.num_outputs; - if (unit == PIPE_SHADER_FRAGMENT) { - /* Compute FS input remapping to match the output from VS/GS */ - if (gs) { - svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); - } else { - assert(vs); - svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); - } - } else if (unit == PIPE_SHADER_GEOMETRY) { - assert(vs); - svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); - } - - /* Since vertex shader does not need to go through the linker to - * establish the input map, we need to make sure the highest index - * of input registers is set properly here. + /** + * Compute input mapping to match the outputs from shader + * in the previous stage */ - emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max, - emit->info.file_max[TGSI_FILE_INPUT]); + compute_input_mapping(svga, emit, unit); determine_clipping_mode(emit); - if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { + if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX || + unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) { if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { /* if there is stream output declarations associated * with this shader or the shader writes to ClipDistance * then reserve extra registers for the non-adjusted vertex position - * and the ClipDistance shadow copy + * and the ClipDistance shadow copy. */ emit->vposition.so_index = emit->num_outputs++; @@ -7073,6 +10988,12 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, variant->shader = shader; variant->nr_tokens = emit_get_num_tokens(emit); variant->tokens = (const unsigned *)emit->buf; + + /* Copy shader signature info to the shader variant */ + if (svga_have_sm5(svga)) { + copy_shader_signature(&emit->signature, variant); + } + emit->buf = NULL; /* buffer is no longer owed by emitter context */ memcpy(&variant->key, key, sizeof(*key)); variant->id = UTIL_BITMASK_INVALID_INDEX; @@ -7091,23 +11012,38 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, variant->extra_const_start--; } - variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + if (unit == PIPE_SHADER_FRAGMENT) { + struct svga_fs_variant *fs_variant = svga_fs_variant(variant); - /* If there was exactly one write to a fragment shader output register - * and it came from a constant buffer, we know all fragments will have - * the same color (except for blending). - */ - variant->constant_color_output = - emit->constant_color_output && emit->num_output_writes == 1; + fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; - /** keep track in the variant if flat interpolation is used - * for any of the varyings. - */ - variant->uses_flat_interp = emit->uses_flat_interp; + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + fs_variant->constant_color_output = + emit->constant_color_output && emit->num_output_writes == 1; + + /** keep track in the variant if flat interpolation is used + * for any of the varyings. + */ + fs_variant->uses_flat_interp = emit->uses_flat_interp; - variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; + fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; + } + else if (unit == PIPE_SHADER_TESS_EVAL) { + struct svga_tes_variant *tes_variant = svga_tes_variant(variant); + + /* Keep track in the tes variant some of the layout parameters. + * These parameters will be referenced by the tcs to emit + * the necessary declarations for the hull shader. + */ + tes_variant->prim_mode = emit->tes.prim_mode; + tes_variant->spacing = emit->tes.spacing; + tes_variant->vertices_order_cw = emit->tes.vertices_order_cw; + tes_variant->point_mode = emit->tes.point_mode; + } - variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; if (tokens != shader->tokens) { tgsi_free_tokens(tokens); diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 88c1c6c7983..717e56caccf 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -427,7 +427,9 @@ struct svga_winsys_context uint32 shaderId, SVGA3dShaderType shaderType, const uint32 *bytecode, - uint32 bytecodeLen); + uint32 bytecodeLen, + const SVGA3dDXShaderSignatureHeader *sgnInfo, + uint32 sgnLen); /** * Destroy a DX GB shader. @@ -457,7 +459,13 @@ struct svga_winsys_context /** For HUD queries */ uint64_t num_commands; + uint64_t num_command_buffers; uint64_t num_draw_commands; + uint64_t num_shader_reloc; + uint64_t num_surf_reloc; + + /* Whether we are in retry processing */ + unsigned int in_retry; }; diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c index 432f9afcd1f..da7506e7797 100644 --- a/src/gallium/winsys/svga/drm/vmw_context.c +++ b/src/gallium/winsys/svga/drm/vmw_context.c @@ -65,6 +65,7 @@ #define VMW_MAX_SURF_MEM_FACTOR 2 + struct vmw_buffer_relocation { struct pb_buffer *buffer; @@ -701,20 +702,19 @@ vmw_svga_winsys_vgpu10_shader_create(struct svga_winsys_context *swc, uint32 shaderId, SVGA3dShaderType shaderType, const uint32 *bytecode, - uint32 bytecodeLen) + uint32 bytecodeLen, + const SVGA3dDXShaderSignatureHeader *sgnInfo, + uint32 sgnLen) { struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); struct vmw_svga_winsys_shader *shader; - struct svga_winsys_gb_shader *gb_shader = - vmw_svga_winsys_shader_create(&vswc->vws->base, shaderType, bytecode, - bytecodeLen); - if (!gb_shader) + shader = vmw_svga_shader_create(&vswc->vws->base, shaderType, bytecode, + bytecodeLen, sgnInfo, sgnLen); + if (!shader) return NULL; - shader = vmw_svga_winsys_shader(gb_shader); shader->shid = shaderId; - - return gb_shader; + return svga_winsys_shader(shader); } /** diff --git a/src/gallium/winsys/svga/drm/vmw_shader.c b/src/gallium/winsys/svga/drm/vmw_shader.c index 56ffdd16f79..dbf63c59234 100644 --- a/src/gallium/winsys/svga/drm/vmw_shader.c +++ b/src/gallium/winsys/svga/drm/vmw_shader.c @@ -28,7 +28,9 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "vmw_context.h" #include "vmw_shader.h" +#include "vmw_buffer.h" #include "vmw_screen.h" void @@ -63,3 +65,54 @@ vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst, *pdst = src; } + + +/** + * A helper function to create a shader object and upload the + * shader bytecode and signature if specified to the shader memory. + */ +struct vmw_svga_winsys_shader * +vmw_svga_shader_create(struct svga_winsys_screen *sws, + SVGA3dShaderType type, + const uint32 *bytecode, + uint32 bytecodeLen, + const SVGA3dDXShaderSignatureHeader *sgnInfo, + uint32 sgnLen) +{ + struct vmw_svga_winsys_shader *shader; + void *map; + + shader = CALLOC_STRUCT(vmw_svga_winsys_shader); + if (!shader) + return NULL; + + pipe_reference_init(&shader->refcnt, 1); + p_atomic_set(&shader->validated, 0); + shader->screen = vmw_winsys_screen(sws); + shader->buf = sws->buffer_create(sws, 64, + SVGA_BUFFER_USAGE_SHADER, + bytecodeLen + sgnLen); + if (!shader->buf) { + FREE(shader); + return NULL; + } + + map = sws->buffer_map(sws, shader->buf, PIPE_TRANSFER_WRITE); + if (!map) { + FREE(shader); + return NULL; + } + + /* copy the shader bytecode */ + memcpy(map, bytecode, bytecodeLen); + + /* if shader signature is specified, append it to the bytecode. */ + if (sgnLen) { + assert(sws->have_sm5); + map = (char *)map + bytecodeLen; + memcpy(map, sgnInfo, sgnLen); + } + sws->buffer_unmap(sws, shader->buf); + + return shader; +} diff --git a/src/gallium/winsys/svga/drm/vmw_shader.h b/src/gallium/winsys/svga/drm/vmw_shader.h index ae557bcc8e4..a62a814471d 100644 --- a/src/gallium/winsys/svga/drm/vmw_shader.h +++ b/src/gallium/winsys/svga/drm/vmw_shader.h @@ -65,4 +65,12 @@ void vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst, struct vmw_svga_winsys_shader *src); +struct vmw_svga_winsys_shader * +vmw_svga_shader_create(struct svga_winsys_screen *sws, + SVGA3dShaderType type, + const uint32 *bytecode, + uint32 bytecodeLen, + const SVGA3dDXShaderSignatureHeader *sgnInfo, + uint32 sgnLen); + #endif /* VMW_SHADER_H_ */