From ac2927335bc7cd4994d2fc0906eb328773b1f923 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Wed, 26 Oct 2016 17:11:00 +0200 Subject: [PATCH] st/nine: Implement gallium nine CSMT Use an offloading thread for all nine_context functions. Macros are used to ease the reading of the code. Signed-off-by: Patrick Rudolph Signed-off-by: Axel Davy --- src/gallium/auxiliary/os/os_thread.h | 11 + .../state_trackers/nine/Makefile.sources | 1 + src/gallium/state_trackers/nine/adapter9.h | 1 + src/gallium/state_trackers/nine/device9.c | 53 +- src/gallium/state_trackers/nine/device9.h | 5 + .../state_trackers/nine/nine_csmt_helper.h | 377 ++++++++++++ src/gallium/state_trackers/nine/nine_queue.c | 15 + src/gallium/state_trackers/nine/nine_queue.h | 3 + src/gallium/state_trackers/nine/nine_state.c | 560 ++++++++++++------ src/gallium/state_trackers/nine/nine_state.h | 34 +- .../state_trackers/nine/pixelshader9.c | 2 +- src/gallium/state_trackers/nine/surface9.c | 1 + src/gallium/state_trackers/nine/swapchain9.c | 2 + .../state_trackers/nine/vertexshader9.c | 2 +- src/gallium/targets/d3dadapter9/drm.c | 6 + .../drivers/dri/common/xmlpool/t_options.h | 5 + 16 files changed, 899 insertions(+), 179 deletions(-) create mode 100644 src/gallium/state_trackers/nine/nine_csmt_helper.h diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index ec8adbc75bb..21faf4b3beb 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -97,6 +97,17 @@ static inline void pipe_thread_setname( const char *name ) } +static inline int pipe_thread_is_self( pipe_thread thread ) +{ +#if defined(HAVE_PTHREAD) +# if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \ + (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) + return pthread_equal(pthread_self(), thread); +# endif +#endif + return 0; +} + /* pipe_mutex */ typedef mtx_t pipe_mutex; diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources index 12649087e7d..2bb08a26122 100644 --- a/src/gallium/state_trackers/nine/Makefile.sources +++ b/src/gallium/state_trackers/nine/Makefile.sources @@ -23,6 +23,7 @@ C_SOURCES := \ indexbuffer9.h \ iunknown.c \ iunknown.h \ + nine_csmt_helper.h \ nine_debug.c \ nine_debug.h \ nine_defines.h \ diff --git a/src/gallium/state_trackers/nine/adapter9.h b/src/gallium/state_trackers/nine/adapter9.h index 4a71540ab4a..60be056f892 100644 --- a/src/gallium/state_trackers/nine/adapter9.h +++ b/src/gallium/state_trackers/nine/adapter9.h @@ -41,6 +41,7 @@ struct d3dadapter9_context BOOL thread_submit; BOOL discard_delayed_release; BOOL tearfree_discard; + int csmt_force; void (*destroy)( struct d3dadapter9_context *ctx ); }; diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index dce908e7f8f..7d4a2a9c4b0 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -263,6 +263,24 @@ NineDevice9_ctor( struct NineDevice9 *This, nine_bind(&This->context.rt[i], This->state.rt[i]); } + /* Initialize CSMT */ + if (pCTX->csmt_force == 1) + This->csmt_active = true; + else if (pCTX->csmt_force == 0) + This->csmt_active = false; + else + /* r600 and radeonsi are thread safe. */ + This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != NULL; + + if (This->csmt_active) { + This->csmt_ctx = nine_csmt_create(This); + if (!This->csmt_ctx) + return E_OUTOFMEMORY; + } + + if (This->csmt_active) + DBG("\033[1;32mCSMT is active\033[0m\n"); + /* Initialize a dummy VBO to be used when a vertex declaration does not * specify all the inputs needed by vertex shader, on win default behavior * is to pass 0,0,0,0 to the shader */ @@ -444,8 +462,8 @@ NineDevice9_ctor( struct NineDevice9 *This, /* Allocate upload helper for drivers that suck (from st pov ;). */ - This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS); - This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS); + This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS) && !This->csmt_active; + This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS) && !This->csmt_active; This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS); This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS); This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS); @@ -487,6 +505,8 @@ NineDevice9_ctor( struct NineDevice9 *This, nine_state_init_sw(This); ID3DPresentGroup_Release(This->present); + if (This->csmt_active) + nine_csmt_process(This); return D3D_OK; } @@ -499,6 +519,13 @@ NineDevice9_dtor( struct NineDevice9 *This ) DBG("This=%p\n", This); + /* Do not call nine_csmt_process here. The device is dead! */ + if (This->csmt_active && This->csmt_ctx) { + nine_csmt_destroy(This, This->csmt_ctx); + This->csmt_active = FALSE; + This->csmt_ctx = NULL; + } + nine_ff_fini(This); nine_state_destroy_sw(This); nine_state_clear(&This->state, TRUE); @@ -564,7 +591,7 @@ NineDevice9_GetScreen( struct NineDevice9 *This ) struct pipe_context * NineDevice9_GetPipe( struct NineDevice9 *This ) { - return This->context.pipe; + return nine_context_get_pipe(This); } const D3DCAPS9 * @@ -3251,7 +3278,9 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, pConstantData, Vector4fCount * 4 * sizeof(state->vs_const_f[0])); - nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount); + nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData, + Vector4fCount * 4 * sizeof(state->vs_const_f[0]), + Vector4fCount); return D3D_OK; } @@ -3317,7 +3346,8 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, &This->range_pool); state->changed.group |= NINE_STATE_VS_CONST; } else - nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount); + nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData, + Vector4iCount * sizeof(int[4]), Vector4iCount); return D3D_OK; } @@ -3391,7 +3421,8 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, &This->range_pool); state->changed.group |= NINE_STATE_VS_CONST; } else - nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData, BoolCount); + nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData, + sizeof(BOOL) * BoolCount, BoolCount); return D3D_OK; } @@ -3655,7 +3686,9 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, pConstantData, Vector4fCount * 4 * sizeof(state->ps_const_f[0])); - nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount); + nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData, + Vector4fCount * 4 * sizeof(state->ps_const_f[0]), + Vector4fCount); return D3D_OK; } @@ -3717,7 +3750,8 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister; state->changed.group |= NINE_STATE_PS_CONST; } else - nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount); + nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData, + sizeof(state->ps_const_i[0]) * Vector4iCount, Vector4iCount); return D3D_OK; } @@ -3785,7 +3819,8 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister; state->changed.group |= NINE_STATE_PS_CONST; } else - nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData, BoolCount); + nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData, + sizeof(BOOL) * BoolCount, BoolCount); return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index 21e045c69dc..4539cda18a6 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -38,6 +38,7 @@ struct pipe_context; struct cso_context; struct hud_context; struct u_upload_mgr; +struct csmt_context; struct NineSwapChain9; struct NineStateBlock9; @@ -56,6 +57,10 @@ struct NineDevice9 struct pipe_context *pipe_sw; struct cso_context *cso_sw; + /* CSMT context */ + struct csmt_context *csmt_ctx; + BOOL csmt_active; + /* creation parameters */ D3DCAPS9 caps; D3DDEVICE_CREATION_PARAMETERS params; diff --git a/src/gallium/state_trackers/nine/nine_csmt_helper.h b/src/gallium/state_trackers/nine/nine_csmt_helper.h new file mode 100644 index 00000000000..92e5a9f18c9 --- /dev/null +++ b/src/gallium/state_trackers/nine/nine_csmt_helper.h @@ -0,0 +1,377 @@ +/* + * Copyright 2016 Patrick Rudolph + * + * Permission is hereby granted, free of charge, f, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, f, copy, modify, merge, f, publish, distribute, f, sub + * license, f, and/or sell copies of the Software, f, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISe, f, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* get number of arguments with __NARG__ */ +#define __NARG__(...) __NARG_I_(__VA_ARGS__,__RSEQ_N()) +#define __NARG_I_(...) __ARG_N(__VA_ARGS__) +#define __ARG_N( \ + _1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \ + _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \ + _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \ + _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \ + _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \ + _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \ + _61,_62,_63,N,...) N +#define __RSEQ_N() \ + 63,62,61,60, \ + 59,58,57,56,55,54,53,52,51,50, \ + 49,48,47,46,45,44,43,42,41,40, \ + 39,38,37,36,35,34,33,32,31,30, \ + 29,28,27,26,25,24,23,22,21,20, \ + 19,18,17,16,15,14,13,12,11,10, \ + 9,8,7,6,5,4,3,2,1,0 + + +#define _args_for_bypass_1(a) a +#define _args_for_bypass_7(a, b, c, d, e, f, g) ,g +#define _args_for_bypass_14(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_7(__VA_ARGS__) +#define _args_for_bypass_21(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_14(__VA_ARGS__) +#define _args_for_bypass_28(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_21(__VA_ARGS__) +#define _args_for_bypass_35(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_28(__VA_ARGS__) +#define _args_for_bypass_42(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_35(__VA_ARGS__) +#define _args_for_bypass_49(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_42(__VA_ARGS__) +#define _args_for_bypass_56(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_49(__VA_ARGS__) +#define _args_for_bypass_63(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_56(__VA_ARGS__) +#define _args_for_bypass_70(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_63(__VA_ARGS__) + +#define _GFUNC_(n) _args_for_bypass_##n +#define _GFUNC(n) _GFUNC_(n) + +#define ARGS_FOR_BYPASS(...) _GFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +#define _args_for_mem_1(a) a; +#define _args_for_mem_7(a, b, c, d, e, f, g) f; +#define _args_for_mem_14(a, b, c, d, e, f, g, ...) f; _args_for_mem_7(__VA_ARGS__) +#define _args_for_mem_21(a, b, c, d, e, f, g, ...) f; _args_for_mem_14(__VA_ARGS__) +#define _args_for_mem_28(a, b, c, d, e, f, g, ...) f; _args_for_mem_21(__VA_ARGS__) +#define _args_for_mem_35(a, b, c, d, e, f, g, ...) f; _args_for_mem_28(__VA_ARGS__) +#define _args_for_mem_42(a, b, c, d, e, f, g, ...) f; _args_for_mem_35(__VA_ARGS__) +#define _args_for_mem_49(a, b, c, d, e, f, g, ...) f; _args_for_mem_42(__VA_ARGS__) +#define _args_for_mem_56(a, b, c, d, e, f, g, ...) f; _args_for_mem_49(__VA_ARGS__) +#define _args_for_mem_63(a, b, c, d, e, f, g, ...) f; _args_for_mem_56(__VA_ARGS__) +#define _args_for_mem_70(a, b, c, d, e, f, g, ...) f; _args_for_mem_63(__VA_ARGS__) + +#define _FFUNC_(n) _args_for_mem_##n +#define _FFUNC(n) _FFUNC_(n) + +#define ARGS_FOR_MEM(...) _FFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +#define _args_for_unbind_1(a) a; +#define _args_for_unbind_7(a, b, c, d, e, f, g) e; +#define _args_for_unbind_14(a, b, c, d, e, f, g, ...) e; _args_for_unbind_7(__VA_ARGS__) +#define _args_for_unbind_21(a, b, c, d, e, f, g, ...) e; _args_for_unbind_14(__VA_ARGS__) +#define _args_for_unbind_28(a, b, c, d, e, f, g, ...) e; _args_for_unbind_21(__VA_ARGS__) +#define _args_for_unbind_35(a, b, c, d, e, f, g, ...) e; _args_for_unbind_28(__VA_ARGS__) +#define _args_for_unbind_42(a, b, c, d, e, f, g, ...) e; _args_for_unbind_35(__VA_ARGS__) +#define _args_for_unbind_49(a, b, c, d, e, f, g, ...) e; _args_for_unbind_42(__VA_ARGS__) +#define _args_for_unbind_56(a, b, c, d, e, f, g, ...) e; _args_for_unbind_49(__VA_ARGS__) +#define _args_for_unbind_63(a, b, c, d, e, f, g, ...) e; _args_for_unbind_56(__VA_ARGS__) +#define _args_for_unbind_70(a, b, c, d, e, f, g, ...) e; _args_for_unbind_63(__VA_ARGS__) + +#define _EFUNC_(n) _args_for_unbind_##n +#define _EFUNC(n) _EFUNC_(n) + +#define ARGS_FOR_UNBIND(...) _EFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +#define _args_for_call_1(a) a +#define _args_for_call_7(a, b, c, d, e, f, g) ,d +#define _args_for_call_14(a, b, c, d, e, f, g, ...) ,d _args_for_call_7(__VA_ARGS__) +#define _args_for_call_21(a, b, c, d, e, f, g, ...) ,d _args_for_call_14(__VA_ARGS__) +#define _args_for_call_28(a, b, c, d, e, f, g, ...) ,d _args_for_call_21(__VA_ARGS__) +#define _args_for_call_35(a, b, c, d, e, f, g, ...) ,d _args_for_call_28(__VA_ARGS__) +#define _args_for_call_42(a, b, c, d, e, f, g, ...) ,d _args_for_call_35(__VA_ARGS__) +#define _args_for_call_49(a, b, c, d, e, f, g, ...) ,d _args_for_call_42(__VA_ARGS__) +#define _args_for_call_56(a, b, c, d, e, f, g, ...) ,d _args_for_call_49(__VA_ARGS__) +#define _args_for_call_63(a, b, c, d, e, f, g, ...) ,d _args_for_call_56(__VA_ARGS__) +#define _args_for_call_70(a, b, c, d, e, f, g, ...) ,d _args_for_call_63(__VA_ARGS__) + +#define _DFUNC_(n) _args_for_call_##n +#define _DFUNC(n) _DFUNC_(n) + +#define ARGS_FOR_CALL(...) _DFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +#define _args_for_decl_1(a) a +#define _args_for_decl_7(a, b, c, d, e, f, g) ,c +#define _args_for_decl_14(a, b, c, d, e, f, g, ...) ,c _args_for_decl_7(__VA_ARGS__) +#define _args_for_decl_21(a, b, c, d, e, f, g, ...) ,c _args_for_decl_14(__VA_ARGS__) +#define _args_for_decl_28(a, b, c, d, e, f, g, ...) ,c _args_for_decl_21(__VA_ARGS__) +#define _args_for_decl_35(a, b, c, d, e, f, g, ...) ,c _args_for_decl_28(__VA_ARGS__) +#define _args_for_decl_42(a, b, c, d, e, f, g, ...) ,c _args_for_decl_35(__VA_ARGS__) +#define _args_for_decl_49(a, b, c, d, e, f, g, ...) ,c _args_for_decl_42(__VA_ARGS__) +#define _args_for_decl_56(a, b, c, d, e, f, g, ...) ,c _args_for_decl_49(__VA_ARGS__) +#define _args_for_decl_63(a, b, c, d, e, f, g, ...) ,c _args_for_decl_56(__VA_ARGS__) +#define _args_for_decl_70(a, b, c, d, e, f, g, ...) ,c _args_for_decl_63(__VA_ARGS__) + +#define _CFUNC_(n) _args_for_decl_##n +#define _CFUNC(n) _CFUNC_(n) + +#define ARGS_FOR_DECLARATION(...) _CFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +#define _args_for_assign_1(a) a +#define _args_for_assign_7(a, b, c, d, e, f, g) b; +#define _args_for_assign_14(a, b, c, d, e, f, g, ...) b; _args_for_assign_7(__VA_ARGS__) +#define _args_for_assign_21(a, b, c, d, e, f, g, ...) b; _args_for_assign_14(__VA_ARGS__) +#define _args_for_assign_28(a, b, c, d, e, f, g, ...) b; _args_for_assign_21(__VA_ARGS__) +#define _args_for_assign_35(a, b, c, d, e, f, g, ...) b; _args_for_assign_28(__VA_ARGS__) +#define _args_for_assign_42(a, b, c, d, e, f, g, ...) b; _args_for_assign_35(__VA_ARGS__) +#define _args_for_assign_49(a, b, c, d, e, f, g, ...) b; _args_for_assign_42(__VA_ARGS__) +#define _args_for_assign_56(a, b, c, d, e, f, g, ...) b; _args_for_assign_49(__VA_ARGS__) +#define _args_for_assign_63(a, b, c, d, e, f, g, ...) b; _args_for_assign_56(__VA_ARGS__) +#define _args_for_assign_70(a, b, c, d, e, f, g, ...) b; _args_for_assign_63(__VA_ARGS__) + +#define _BFUNC_(n) _args_for_assign_##n +#define _BFUNC(n) _BFUNC_(n) + +#define ARGS_FOR_ASSIGN(...) _BFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +#define _args_for_struct_1(a) a; +#define _args_for_struct_7(a, b, c, d, e, f, g) a; +#define _args_for_struct_14(a, b, c, d, e, f, g, ...) a; _args_for_struct_7(__VA_ARGS__) +#define _args_for_struct_21(a, b, c, d, e, f, g, ...) a; _args_for_struct_14(__VA_ARGS__) +#define _args_for_struct_28(a, b, c, d, e, f, g, ...) a; _args_for_struct_21(__VA_ARGS__) +#define _args_for_struct_35(a, b, c, d, e, f, g, ...) a; _args_for_struct_28(__VA_ARGS__) +#define _args_for_struct_42(a, b, c, d, e, f, g, ...) a; _args_for_struct_35(__VA_ARGS__) +#define _args_for_struct_49(a, b, c, d, e, f, g, ...) a; _args_for_struct_42(__VA_ARGS__) +#define _args_for_struct_56(a, b, c, d, e, f, g, ...) a; _args_for_struct_49(__VA_ARGS__) +#define _args_for_struct_63(a, b, c, d, e, f, g, ...) a; _args_for_struct_56(__VA_ARGS__) +#define _args_for_struct_70(a, b, c, d, e, f, g, ...) a; _args_for_struct_63(__VA_ARGS__) + +#define _AFUNC_(n) _args_for_struct_##n +#define _AFUNC(n) _AFUNC_(n) + +#define ARGS_FOR_STRUCT(...) _AFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__) + +/* Serialization and deserialization */ + +#define CSMT_ITEM_NO_WAIT(name, ...) \ +\ +struct s_##name##_private { \ + struct csmt_instruction instr; \ + ARGS_FOR_STRUCT( __VA_ARGS__ ) \ +}; \ +\ +static void \ +name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ); \ +\ +static int \ +name##_rx( struct NineDevice9 *device, struct csmt_instruction *instr ) \ +{ \ + struct csmt_context *ctx = device->csmt_ctx; \ + struct s_##name##_private *args = (struct s_##name##_private *)instr; \ + \ + (void) args; \ + (void) ctx; \ + name##_priv( \ + device ARGS_FOR_CALL( __VA_ARGS__ ) \ + ); \ + ARGS_FOR_UNBIND( __VA_ARGS__ ) \ + return 0; \ +} \ +\ +void \ +name( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ) \ +{ \ + struct csmt_context *ctx = device->csmt_ctx; \ + struct s_##name##_private *args; \ + unsigned memsize = sizeof(struct s_##name##_private); \ + unsigned memsize2 = 0; \ + \ + if (!device->csmt_active) { \ + name##_priv( \ + device ARGS_FOR_BYPASS( __VA_ARGS__ ) \ + ); \ + return; \ + } \ + ARGS_FOR_MEM ( __VA_ARGS__ ) \ + args = nine_queue_alloc(ctx->pool, memsize + memsize2); \ + assert(args); \ + args->instr.func = &name##_rx; \ + ARGS_FOR_ASSIGN( __VA_ARGS__ ) \ +} \ +\ +static void \ +name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ) + +#define CSMT_ITEM_DO_WAIT(name, ...) \ +\ +struct s_##name##_private { \ + struct csmt_instruction instr; \ + ARGS_FOR_STRUCT( __VA_ARGS__ ) \ +}; \ +static void \ +name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ); \ +\ +static int \ +name##_rx( struct NineDevice9 *device, struct csmt_instruction *instr) \ +{ \ + struct csmt_context *ctx = device->csmt_ctx; \ + struct s_##name##_private *args = (struct s_##name##_private *)instr; \ + \ + (void) args; \ + (void) ctx; \ + name##_priv( \ + device ARGS_FOR_CALL( __VA_ARGS__ ) \ + ); \ + ARGS_FOR_UNBIND( __VA_ARGS__ ) \ + return 1; \ +} \ +\ +void \ +name( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ) \ +{ \ + struct csmt_context *ctx = device->csmt_ctx; \ + struct s_##name##_private *args; \ + unsigned memsize = sizeof(struct s_##name##_private); \ + unsigned memsize2 = 0; \ + \ + if (!device->csmt_active) { \ + name##_priv( \ + device ARGS_FOR_BYPASS( __VA_ARGS__ ) \ + ); \ + return; \ + } \ + ARGS_FOR_MEM ( __VA_ARGS__ ) \ + args = nine_queue_alloc(ctx->pool, memsize + memsize2); \ + assert(args); \ + args->instr.func = &name##_rx; \ + ARGS_FOR_ASSIGN( __VA_ARGS__ ) \ + ctx->processed = FALSE; \ + nine_queue_flush(ctx->pool); \ + nine_csmt_wait_processed(ctx); \ +} \ +\ +static void \ +name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ) + +/* ARGS_FOR_STRUCT, ARGS_FOR_ASSIGN, ARGS_FOR_DECLARATION, ARGS_FOR_CALL, ARGS_FOR_UNBIND, ARGS_FOR_MEM, ARGS_FOR_BYPASS */ +#define ARG_VAL(x, y) \ + x _##y ; ,\ + args->_##y = y ; ,\ + x y ,\ + args->_##y ,\ + ,\ + ,\ + y + +#define ARG_REF(x, y) \ + x* _##y ; ,\ + args->_##y = y; ,\ + x *y ,\ + args->_##y ,\ + ,\ + ,\ + y + +#define ARG_COPY_REF(x, y) \ + x * _##y ; x __##y ; ,\ + if ( y ) { args->_##y = &args->__##y ; args->__##y = *y ; } else { args->_##y = NULL; } ,\ + const x *y ,\ + (const x *)args->_##y ,\ + ,\ + ,\ + (const x *)y + +#define ARG_BIND_REF(x, y) \ + x * _##y ,\ + args->_##y = NULL; \ + if (args->_##y != y && args->_##y) \ + NineUnknown_Unbind((void *)(args->_##y)); \ + if ( args->_##y != y && y ) \ + NineUnknown_Bind( (void *)y ); \ + if ( args->_##y != y ) \ + args->_##y = y ; ,\ + x *y ,\ + args->_##y,\ + if (args->_##y != NULL && args->_##y) \ + NineUnknown_Unbind((void *)(args->_##y)); \ + args->_##y = NULL; ,\ + ,\ + y + +#define ARG_BIND_RES(x, y) \ + x * _##y ,\ + args->_##y = NULL; \ + if (y) \ + pipe_resource_reference(&args->_##y, y); ,\ + x *y ,\ + args->_##y ,\ + if (args->_##y) \ + pipe_resource_reference(&args->_##y, NULL); ,\ + ,\ + y + +#define ARG_MEM(x, y) \ + x * _##y ,\ + args->_##y = (void *)args + memsize;\ + memcpy(args->_##y, y, memsize2); ,\ + const x *y ,\ + (const x *)args->_##y ,\ + ,\ + ,\ + (const x *)y + +#define ARG_MEM_SIZE(x, y) \ + x _##y ,\ + args->_##y = y; ,\ + x y ,\ + args->_##y ,\ + ,\ + memsize2 = y, \ + y + +#define ARG_BIND_BLIT(x, y) \ + x _##y ,\ + memcpy(&args->_##y , y, sizeof(x)); \ + args->_##y.src.resource = NULL; \ + args->_##y.dst.resource = NULL; \ + pipe_resource_reference(&args->_##y.src.resource, y->src.resource); \ + pipe_resource_reference(&args->_##y.dst.resource, y->dst.resource);,\ + x *y ,\ + &args->_##y ,\ + pipe_resource_reference(&args->_##y.src.resource, NULL); \ + pipe_resource_reference(&args->_##y.dst.resource, NULL);,\ + ,\ + y + +#define ARG_BIND_BUF(x, y) \ + x _##y ,\ + memcpy(&args->_##y , y, sizeof(x)); \ + args->_##y.buffer = NULL; \ + pipe_resource_reference(&args->_##y.buffer, y->buffer); ,\ + x *y ,\ + &args->_##y ,\ + pipe_resource_reference(&args->_##y.buffer, NULL); ,\ + ,\ + y + +#define ARG_BIND_VIEW(x, y) \ + x * _##y ,\ + args->_##y = NULL; \ + if (y) \ + pipe_sampler_view_reference(&args->_##y, y); ,\ + x *y ,\ + args->_##y ,\ + if (args->_##y) \ + pipe_sampler_view_reference(&args->_##y, NULL); ,\ + ,\ + y + diff --git a/src/gallium/state_trackers/nine/nine_queue.c b/src/gallium/state_trackers/nine/nine_queue.c index 31f9ce77ca8..885b0ab6873 100644 --- a/src/gallium/state_trackers/nine/nine_queue.c +++ b/src/gallium/state_trackers/nine/nine_queue.c @@ -143,6 +143,10 @@ nine_queue_flush(struct nine_queue_pool* ctx) DBG("flushing cmdbuf=%p instr=%d size=%d\n", cmdbuf, cmdbuf->num_instr, cmdbuf->offset); + /* Nothing to flush */ + if (!cmdbuf->num_instr) + return; + /* signal waiting worker */ pipe_mutex_lock(ctx->mutex_push); cmdbuf->full = 1; @@ -200,6 +204,17 @@ nine_queue_alloc(struct nine_queue_pool* ctx, unsigned space) return cmdbuf->mem_pool + offset; } +/* Returns the current queue empty state. + * TRUE no instructions queued. + * FALSE one ore more instructions queued. */ +bool +nine_queue_isempty(struct nine_queue_pool* ctx) +{ + struct nine_cmdbuf *cmdbuf = &ctx->pool[ctx->head]; + + return (ctx->tail == ctx->head) && !cmdbuf->num_instr; +} + struct nine_queue_pool* nine_queue_create(void) { diff --git a/src/gallium/state_trackers/nine/nine_queue.h b/src/gallium/state_trackers/nine/nine_queue.h index 259978e842a..cc15bd853b4 100644 --- a/src/gallium/state_trackers/nine/nine_queue.h +++ b/src/gallium/state_trackers/nine/nine_queue.h @@ -39,6 +39,9 @@ nine_queue_flush(struct nine_queue_pool* ctx); void * nine_queue_alloc(struct nine_queue_pool* ctx, unsigned space); +bool +nine_queue_isempty(struct nine_queue_pool* ctx); + struct nine_queue_pool* nine_queue_create(void); diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index d18435f1aef..745b9d8c65b 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -21,7 +21,10 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#define NINE_STATE + #include "device9.h" +#include "swapchain9.h" #include "basetexture9.h" #include "buffer9.h" #include "indexbuffer9.h" @@ -36,13 +39,213 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "cso_cache/cso_context.h" +#include "util/u_atomic.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" #include "util/u_box.h" #include "util/u_simple_shaders.h" +/* CSMT headers */ +#include "nine_queue.h" +#include "nine_csmt_helper.h" +#include "os/os_thread.h" + #define DBG_CHANNEL DBG_DEVICE +/* Nine CSMT */ + +struct csmt_instruction { + int (* func)(struct NineDevice9 *This, struct csmt_instruction *instr); +}; + +struct csmt_context { + pipe_thread worker; + struct nine_queue_pool* pool; + BOOL terminate; + pipe_condvar event_processed; + pipe_mutex mutex_processed; + struct NineDevice9 *device; + BOOL processed; +}; + +/* Wait for instruction to be processed. + * Caller has to ensure that only one thread waits at time. + */ +static void +nine_csmt_wait_processed(struct csmt_context *ctx) +{ + pipe_mutex_lock(ctx->mutex_processed); + while (!p_atomic_read(&ctx->processed)) { + pipe_condvar_wait(ctx->event_processed, ctx->mutex_processed); + } + pipe_mutex_unlock(ctx->mutex_processed); +} + +/* CSMT worker thread */ +static +PIPE_THREAD_ROUTINE(nine_csmt_worker, arg) +{ + struct csmt_context *ctx = arg; + struct csmt_instruction *instr; + DBG("CSMT worker spawned\n"); + + pipe_thread_setname("CSMT-Worker"); + + while (1) { + nine_queue_wait_flush(ctx->pool); + + /* Get instruction. NULL on empty cmdbuf. */ + while (!p_atomic_read(&ctx->terminate) && + (instr = (struct csmt_instruction *)nine_queue_get(ctx->pool))) { + + /* decode */ + if (instr->func(ctx->device, instr)) { + pipe_mutex_lock(ctx->mutex_processed); + p_atomic_set(&ctx->processed, TRUE); + pipe_condvar_signal(ctx->event_processed); + pipe_mutex_unlock(ctx->mutex_processed); + } + } + if (p_atomic_read(&ctx->terminate)) { + pipe_mutex_lock(ctx->mutex_processed); + p_atomic_set(&ctx->processed, TRUE); + pipe_condvar_signal(ctx->event_processed); + pipe_mutex_unlock(ctx->mutex_processed); + break; + } + } + + DBG("CSMT worker destroyed\n"); + return 0; +} + +/* Create a CSMT context. + * Spawns a worker thread. + */ +struct csmt_context * +nine_csmt_create( struct NineDevice9 *This ) +{ + struct csmt_context *ctx; + + ctx = CALLOC_STRUCT(csmt_context); + if (!ctx) + return NULL; + + ctx->pool = nine_queue_create(); + if (!ctx->pool) { + FREE(ctx); + return NULL; + } + pipe_condvar_init(ctx->event_processed); + pipe_mutex_init(ctx->mutex_processed); + +#if DEBUG + pipe_thread_setname("Main thread"); +#endif + + ctx->device = This; + + ctx->worker = pipe_thread_create(nine_csmt_worker, ctx); + if (!ctx->worker) { + nine_queue_delete(ctx->pool); + FREE(ctx); + return NULL; + } + + DBG("Returning context %p\n", ctx); + + return ctx; +} + +static int +nop_func( struct NineDevice9 *This, struct csmt_instruction *instr ) +{ + (void) This; + (void) instr; + + return 1; +} + +/* Push nop instruction and flush the queue. + * Waits for the worker to complete. */ +void +nine_csmt_process( struct NineDevice9 *device ) +{ + struct csmt_instruction* instr; + struct csmt_context *ctx = device->csmt_ctx; + + if (!device->csmt_active) + return; + + if (nine_queue_isempty(ctx->pool)) + return; + + DBG("device=%p\n", device); + + /* NOP */ + instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction)); + assert(instr); + instr->func = nop_func; + + p_atomic_set(&ctx->processed, FALSE); + nine_queue_flush(ctx->pool); + + nine_csmt_wait_processed(ctx); +} + +/* Destroys a CSMT context. + * Waits for the worker thread to terminate. + */ +void +nine_csmt_destroy( struct NineDevice9 *device, struct csmt_context *ctx ) +{ + struct csmt_instruction* instr; + pipe_thread render_thread = ctx->worker; + + DBG("device=%p ctx=%p\n", device, ctx); + + /* Push nop and flush the queue. */ + instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction)); + assert(instr); + instr->func = nop_func; + + p_atomic_set(&ctx->processed, FALSE); + /* Signal worker to terminate. */ + p_atomic_set(&ctx->terminate, TRUE); + nine_queue_flush(ctx->pool); + + nine_csmt_wait_processed(ctx); + nine_queue_delete(ctx->pool); + pipe_mutex_destroy(ctx->mutex_processed); + + FREE(ctx); + + pipe_thread_wait(render_thread); +} + +struct pipe_context * +nine_context_get_pipe( struct NineDevice9 *device ) +{ + if (device->csmt_active) + nine_csmt_process(device); + return device->context.pipe; +} + +struct pipe_context * +nine_context_get_pipe_multithread( struct NineDevice9 *device ) +{ + struct csmt_context *ctx = device->csmt_ctx; + + if (!device->csmt_active) + return device->context.pipe; + + if (!pipe_thread_is_self(ctx->worker)) + nine_csmt_process(device); + + return device->context.pipe; +} +/* Nine state functions */ + /* Check if some states need to be set dirty */ static inline DWORD @@ -1094,11 +1297,44 @@ NineDevice9_ResolveZ( struct NineDevice9 *device ) #define ALPHA_TO_COVERAGE_ENABLE MAKEFOURCC('A', '2', 'M', '1') #define ALPHA_TO_COVERAGE_DISABLE MAKEFOURCC('A', '2', 'M', '0') +/* Nine_context functions. + * Serialized through CSMT macros. + */ -void -nine_context_set_render_state(struct NineDevice9 *device, - D3DRENDERSTATETYPE State, - DWORD Value) +static void +nine_context_set_texture_apply(struct NineDevice9 *device, + DWORD stage, + BOOL enabled, + BOOL shadow, + DWORD lod, + D3DRESOURCETYPE type, + uint8_t pstype, + struct pipe_resource *res, + struct pipe_sampler_view *view0, + struct pipe_sampler_view *view1); +static void +nine_context_set_stream_source_apply(struct NineDevice9 *device, + UINT StreamNumber, + struct pipe_resource *res, + UINT OffsetInBytes, + UINT Stride); + +static void +nine_context_set_indices_apply(struct NineDevice9 *device, + struct pipe_resource *res, + UINT IndexSize, + UINT OffsetInBytes); + +static void +nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device, + UINT StartRegister, + const int *pConstantData, + unsigned pConstantData_size, + UINT Vector4iCount); + +CSMT_ITEM_NO_WAIT(nine_context_set_render_state, + ARG_VAL(D3DRENDERSTATETYPE, State), + ARG_VAL(DWORD, Value)) { struct nine_context *context = &device->context; @@ -1137,17 +1373,16 @@ nine_context_set_render_state(struct NineDevice9 *device, context->changed.group |= nine_render_state_group[State]; } -static void -nine_context_set_texture_apply(struct NineDevice9 *device, - DWORD stage, - BOOL enabled, - BOOL shadow, - DWORD lod, - D3DRESOURCETYPE type, - uint8_t pstype, - struct pipe_resource *res, - struct pipe_sampler_view *view0, - struct pipe_sampler_view *view1) +CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply, + ARG_VAL(DWORD, stage), + ARG_VAL(BOOL, enabled), + ARG_VAL(BOOL, shadow), + ARG_VAL(DWORD, lod), + ARG_VAL(D3DRESOURCETYPE, type), + ARG_VAL(uint8_t, pstype), + ARG_BIND_RES(struct pipe_resource, res), + ARG_BIND_VIEW(struct pipe_sampler_view, view0), + ARG_BIND_VIEW(struct pipe_sampler_view, view1)) { struct nine_context *context = &device->context; @@ -1197,11 +1432,10 @@ nine_context_set_texture(struct NineDevice9 *device, res, view0, view1); } -void -nine_context_set_sampler_state(struct NineDevice9 *device, - DWORD Sampler, - D3DSAMPLERSTATETYPE Type, - DWORD Value) +CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state, + ARG_VAL(DWORD, Sampler), + ARG_VAL(D3DSAMPLERSTATETYPE, Type), + ARG_VAL(DWORD, Value)) { struct nine_context *context = &device->context; @@ -1213,12 +1447,11 @@ nine_context_set_sampler_state(struct NineDevice9 *device, context->changed.sampler[Sampler] |= 1 << Type; } -static void -nine_context_set_stream_source_apply(struct NineDevice9 *device, - UINT StreamNumber, - struct pipe_resource *res, - UINT OffsetInBytes, - UINT Stride) +CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply, + ARG_VAL(UINT, StreamNumber), + ARG_BIND_RES(struct pipe_resource, res), + ARG_VAL(UINT, OffsetInBytes), + ARG_VAL(UINT, Stride)) { struct nine_context *context = &device->context; const unsigned i = StreamNumber; @@ -1249,10 +1482,9 @@ nine_context_set_stream_source(struct NineDevice9 *device, Stride); } -void -nine_context_set_stream_source_freq(struct NineDevice9 *device, - UINT StreamNumber, - UINT Setting) +CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq, + ARG_VAL(UINT, StreamNumber), + ARG_VAL(UINT, Setting)) { struct nine_context *context = &device->context; @@ -1267,11 +1499,10 @@ nine_context_set_stream_source_freq(struct NineDevice9 *device, context->changed.group |= NINE_STATE_STREAMFREQ; } -static void -nine_context_set_indices_apply(struct NineDevice9 *device, - struct pipe_resource *res, - UINT IndexSize, - UINT OffsetInBytes) +CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply, + ARG_BIND_RES(struct pipe_resource, res), + ARG_VAL(UINT, IndexSize), + ARG_VAL(UINT, OffsetInBytes)) { struct nine_context *context = &device->context; @@ -1302,9 +1533,8 @@ nine_context_set_indices(struct NineDevice9 *device, nine_context_set_indices_apply(device, res, IndexSize, OffsetInBytes); } -void -nine_context_set_vertex_declaration(struct NineDevice9 *device, - struct NineVertexDeclaration9 *vdecl) +CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration, + ARG_BIND_REF(struct NineVertexDeclaration9, vdecl)) { struct nine_context *context = &device->context; BOOL was_programmable_vs = context->programmable_vs; @@ -1320,9 +1550,8 @@ nine_context_set_vertex_declaration(struct NineDevice9 *device, context->changed.group |= NINE_STATE_VDECL; } -void -nine_context_set_vertex_shader(struct NineDevice9 *device, - struct NineVertexShader9 *pShader) +CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader, + ARG_BIND_REF(struct NineVertexShader9, pShader)) { struct nine_context *context = &device->context; BOOL was_programmable_vs = context->programmable_vs; @@ -1338,18 +1567,18 @@ nine_context_set_vertex_shader(struct NineDevice9 *device, context->changed.group |= NINE_STATE_VS; } -void -nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device, - UINT StartRegister, - const float *pConstantData, - UINT Vector4fCount) +CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f, + ARG_VAL(UINT, StartRegister), + ARG_MEM(float, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, Vector4fCount)) { struct nine_context *context = &device->context; float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f; memcpy(&vs_const_f[StartRegister * 4], pConstantData, - Vector4fCount * 4 * sizeof(context->vs_const_f[0])); + pConstantData_size); if (device->may_swvp) { Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister; @@ -1363,12 +1592,11 @@ nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device, context->changed.group |= NINE_STATE_VS_CONST; } - -void -nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device, - UINT StartRegister, - const int *pConstantData, - UINT Vector4iCount) +CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i, + ARG_VAL(UINT, StartRegister), + ARG_MEM(int, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, Vector4iCount)) { struct nine_context *context = &device->context; int i; @@ -1376,7 +1604,7 @@ nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device, if (device->driver_caps.vs_integer) { memcpy(&context->vs_const_i[4 * StartRegister], pConstantData, - Vector4iCount * sizeof(int[4])); + pConstantData_size); } else { for (i = 0; i < Vector4iCount; i++) { context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i])); @@ -1390,16 +1618,18 @@ nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device, context->changed.group |= NINE_STATE_VS_CONST; } -void -nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device, - UINT StartRegister, - const BOOL *pConstantData, - UINT BoolCount) +CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b, + ARG_VAL(UINT, StartRegister), + ARG_MEM(BOOL, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, BoolCount)) { struct nine_context *context = &device->context; int i; uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f); + (void) pConstantData_size; + for (i = 0; i < BoolCount; i++) context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; @@ -1407,9 +1637,8 @@ nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device, context->changed.group |= NINE_STATE_VS_CONST; } -void -nine_context_set_pixel_shader(struct NineDevice9 *device, - struct NinePixelShader9* ps) +CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader, + ARG_BIND_REF(struct NinePixelShader9, ps)) { struct nine_context *context = &device->context; unsigned old_mask = context->ps ? context->ps->rt_mask : 1; @@ -1430,28 +1659,28 @@ nine_context_set_pixel_shader(struct NineDevice9 *device, context->changed.group |= NINE_STATE_FB; } -void -nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device, - UINT StartRegister, - const float *pConstantData, - UINT Vector4fCount) +CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f, + ARG_VAL(UINT, StartRegister), + ARG_MEM(float, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, Vector4fCount)) { struct nine_context *context = &device->context; memcpy(&context->ps_const_f[StartRegister * 4], pConstantData, - Vector4fCount * 4 * sizeof(context->ps_const_f[0])); + pConstantData_size); context->changed.ps_const_f = TRUE; context->changed.group |= NINE_STATE_PS_CONST; } /* For stateblocks */ -static void -nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device, - UINT StartRegister, - const int *pConstantData, - UINT Vector4iCount) +CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed, + ARG_VAL(UINT, StartRegister), + ARG_MEM(int, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, Vector4iCount)) { struct nine_context *context = &device->context; @@ -1463,11 +1692,11 @@ nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device, context->changed.group |= NINE_STATE_PS_CONST; } -void -nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device, - UINT StartRegister, - const int *pConstantData, - UINT Vector4iCount) +CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i, + ARG_VAL(UINT, StartRegister), + ARG_MEM(int, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, Vector4iCount)) { struct nine_context *context = &device->context; int i; @@ -1475,7 +1704,7 @@ nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device, if (device->driver_caps.ps_integer) { memcpy(&context->ps_const_i[StartRegister][0], pConstantData, - Vector4iCount * sizeof(context->ps_const_i[0])); + pConstantData_size); } else { for (i = 0; i < Vector4iCount; i++) { context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i])); @@ -1488,16 +1717,18 @@ nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device, context->changed.group |= NINE_STATE_PS_CONST; } -void -nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device, - UINT StartRegister, - const BOOL *pConstantData, - UINT BoolCount) +CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b, + ARG_VAL(UINT, StartRegister), + ARG_MEM(BOOL, pConstantData), + ARG_MEM_SIZE(unsigned, pConstantData_size), + ARG_VAL(UINT, BoolCount)) { struct nine_context *context = &device->context; int i; uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f); + (void) pConstantData_size; + for (i = 0; i < BoolCount; i++) context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; @@ -1505,10 +1736,10 @@ nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device, context->changed.group |= NINE_STATE_PS_CONST; } -void -nine_context_set_render_target(struct NineDevice9 *device, - DWORD RenderTargetIndex, - struct NineSurface9 *rt) +/* XXX: use resource, as resource might change */ +CSMT_ITEM_NO_WAIT(nine_context_set_render_target, + ARG_VAL(DWORD, RenderTargetIndex), + ARG_BIND_REF(struct NineSurface9, rt)) { struct nine_context *context = &device->context; const unsigned i = RenderTargetIndex; @@ -1540,9 +1771,9 @@ nine_context_set_render_target(struct NineDevice9 *device, } } -void -nine_context_set_depth_stencil(struct NineDevice9 *device, - struct NineSurface9 *ds) +/* XXX: use resource instead of ds, as resource might change */ +CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil, + ARG_BIND_REF(struct NineSurface9, ds)) { struct nine_context *context = &device->context; @@ -1550,9 +1781,8 @@ nine_context_set_depth_stencil(struct NineDevice9 *device, context->changed.group |= NINE_STATE_FB; } -void -nine_context_set_viewport(struct NineDevice9 *device, - const D3DVIEWPORT9 *viewport) +CSMT_ITEM_NO_WAIT(nine_context_set_viewport, + ARG_COPY_REF(D3DVIEWPORT9, viewport)) { struct nine_context *context = &device->context; @@ -1560,9 +1790,8 @@ nine_context_set_viewport(struct NineDevice9 *device, context->changed.group |= NINE_STATE_VIEWPORT; } -void -nine_context_set_scissor(struct NineDevice9 *device, - const struct pipe_scissor_state *scissor) +CSMT_ITEM_NO_WAIT(nine_context_set_scissor, + ARG_COPY_REF(struct pipe_scissor_state, scissor)) { struct nine_context *context = &device->context; @@ -1570,10 +1799,9 @@ nine_context_set_scissor(struct NineDevice9 *device, context->changed.group |= NINE_STATE_SCISSOR; } -void -nine_context_set_transform(struct NineDevice9 *device, - D3DTRANSFORMSTATETYPE State, - const D3DMATRIX *pMatrix) +CSMT_ITEM_NO_WAIT(nine_context_set_transform, + ARG_VAL(D3DTRANSFORMSTATETYPE, State), + ARG_COPY_REF(D3DMATRIX, pMatrix)) { struct nine_context *context = &device->context; D3DMATRIX *M = nine_state_access_transform(&context->ff, State, TRUE); @@ -1583,9 +1811,8 @@ nine_context_set_transform(struct NineDevice9 *device, context->changed.group |= NINE_STATE_FF; } -void -nine_context_set_material(struct NineDevice9 *device, - const D3DMATERIAL9 *pMaterial) +CSMT_ITEM_NO_WAIT(nine_context_set_material, + ARG_COPY_REF(D3DMATERIAL9, pMaterial)) { struct nine_context *context = &device->context; @@ -1593,10 +1820,9 @@ nine_context_set_material(struct NineDevice9 *device, context->changed.group |= NINE_STATE_FF_MATERIAL; } -void -nine_context_set_light(struct NineDevice9 *device, - DWORD Index, - const D3DLIGHT9 *pLight) +CSMT_ITEM_NO_WAIT(nine_context_set_light, + ARG_VAL(DWORD, Index), + ARG_COPY_REF(D3DLIGHT9, pLight)) { struct nine_context *context = &device->context; @@ -1613,26 +1839,26 @@ nine_context_light_enable_stateblock(struct NineDevice9 *device, { struct nine_context *context = &device->context; + if (device->csmt_active) /* TODO: fix */ + nine_csmt_process(device); memcpy(context->ff.active_light, active_light, NINE_MAX_LIGHTS_ACTIVE * sizeof(context->ff.active_light[0])); context->ff.num_lights_active = num_lights_active; context->changed.group |= NINE_STATE_FF_LIGHTING; } -void -nine_context_light_enable(struct NineDevice9 *device, - DWORD Index, - BOOL Enable) +CSMT_ITEM_NO_WAIT(nine_context_light_enable, + ARG_VAL(DWORD, Index), + ARG_VAL(BOOL, Enable)) { struct nine_context *context = &device->context; nine_state_light_enable(&context->ff, &context->changed.group, Index, Enable); } -void -nine_context_set_texture_stage_state(struct NineDevice9 *device, - DWORD Stage, - D3DTEXTURESTAGESTATETYPE Type, - DWORD Value) +CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state, + ARG_VAL(DWORD, Stage), + ARG_VAL(D3DTEXTURESTAGESTATETYPE, Type), + ARG_VAL(DWORD, Value)) { struct nine_context *context = &device->context; int bumpmap_index = -1; @@ -1673,10 +1899,9 @@ nine_context_set_texture_stage_state(struct NineDevice9 *device, context->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32); } -void -nine_context_set_clip_plane(struct NineDevice9 *device, - DWORD Index, - struct nine_clipplane *pPlane) +CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane, + ARG_VAL(DWORD, Index), + ARG_COPY_REF(struct nine_clipplane, pPlane)) { struct nine_context *context = &device->context; @@ -1684,9 +1909,8 @@ nine_context_set_clip_plane(struct NineDevice9 *device, context->changed.ucp = TRUE; } -void -nine_context_set_swvp(struct NineDevice9 *device, - boolean swvp) +CSMT_ITEM_NO_WAIT(nine_context_set_swvp, + ARG_VAL(boolean, swvp)) { struct nine_context *context = &device->context; @@ -2006,14 +2230,17 @@ nine_context_apply_stateblock(struct NineDevice9 *device, for (r = src->changed.vs_const_f; r; r = r->next) nine_context_set_vertex_shader_constant_f(device, r->bgn, &src->vs_const_f[r->bgn * 4], + sizeof(float[4]) * (r->end - r->bgn), r->end - r->bgn); for (r = src->changed.vs_const_i; r; r = r->next) nine_context_set_vertex_shader_constant_i(device, r->bgn, &src->vs_const_i[r->bgn * 4], + sizeof(int[4]) * (r->end - r->bgn), r->end - r->bgn); for (r = src->changed.vs_const_b; r; r = r->next) nine_context_set_vertex_shader_constant_b(device, r->bgn, &src->vs_const_b[r->bgn * 4], + sizeof(BOOL) * (r->end - r->bgn), r->end - r->bgn); } @@ -2023,20 +2250,21 @@ nine_context_apply_stateblock(struct NineDevice9 *device, for (r = src->changed.ps_const_f; r; r = r->next) nine_context_set_pixel_shader_constant_f(device, r->bgn, &src->ps_const_f[r->bgn * 4], + sizeof(float[4]) * (r->end - r->bgn), r->end - r->bgn); if (src->changed.ps_const_i) { uint16_t m = src->changed.ps_const_i; for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1) if (m & 1) nine_context_set_pixel_shader_constant_i_transformed(device, i, - src->ps_const_i[i], 1); + src->ps_const_i[i], sizeof(int[4]), 1); } if (src->changed.ps_const_b) { uint16_t m = src->changed.ps_const_b; for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1) if (m & 1) nine_context_set_pixel_shader_constant_b(device, i, - &src->ps_const_b[i], 1); + &src->ps_const_b[i], sizeof(BOOL), 1); } } @@ -2103,15 +2331,13 @@ nine_update_state_framebuffer_clear(struct NineDevice9 *device) update_framebuffer(device, TRUE); } -/* Checks were already done before the call */ -void -nine_context_clear_fb(struct NineDevice9 *device, - DWORD Count, - const D3DRECT *pRects, - DWORD Flags, - D3DCOLOR Color, - float Z, - DWORD Stencil) +CSMT_ITEM_NO_WAIT(nine_context_clear_fb, + ARG_VAL(DWORD, Count), + ARG_COPY_REF(D3DRECT, pRects), + ARG_VAL(DWORD, Flags), + ARG_VAL(D3DCOLOR, Color), + ARG_VAL(float, Z), + ARG_VAL(DWORD, Stencil)) { struct nine_context *context = &device->context; const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; @@ -2266,11 +2492,10 @@ init_draw_info(struct pipe_draw_info *info, info->indirect_params = NULL; } -void -nine_context_draw_primitive(struct NineDevice9 *device, - D3DPRIMITIVETYPE PrimitiveType, - UINT StartVertex, - UINT PrimitiveCount) +CSMT_ITEM_NO_WAIT(nine_context_draw_primitive, + ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType), + ARG_VAL(UINT, StartVertex), + ARG_VAL(UINT, PrimitiveCount)) { struct nine_context *context = &device->context; struct pipe_draw_info info; @@ -2287,14 +2512,13 @@ nine_context_draw_primitive(struct NineDevice9 *device, context->pipe->draw_vbo(context->pipe, &info); } -void -nine_context_draw_indexed_primitive(struct NineDevice9 *device, - D3DPRIMITIVETYPE PrimitiveType, - INT BaseVertexIndex, - UINT MinVertexIndex, - UINT NumVertices, - UINT StartIndex, - UINT PrimitiveCount) +CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive, + ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType), + ARG_VAL(INT, BaseVertexIndex), + ARG_VAL(UINT, MinVertexIndex), + ARG_VAL(UINT, NumVertices), + ARG_VAL(UINT, StartIndex), + ARG_VAL(UINT, PrimitiveCount)) { struct nine_context *context = &device->context; struct pipe_draw_info info; @@ -2312,11 +2536,10 @@ nine_context_draw_indexed_primitive(struct NineDevice9 *device, context->pipe->draw_vbo(context->pipe, &info); } -void -nine_context_draw_primitive_from_vtxbuf(struct NineDevice9 *device, - D3DPRIMITIVETYPE PrimitiveType, - UINT PrimitiveCount, - struct pipe_vertex_buffer *vtxbuf) +CSMT_ITEM_NO_WAIT(nine_context_draw_primitive_from_vtxbuf, + ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType), + ARG_VAL(UINT, PrimitiveCount), + ARG_BIND_BUF(struct pipe_vertex_buffer, vtxbuf)) { struct nine_context *context = &device->context; struct pipe_draw_info info; @@ -2335,14 +2558,13 @@ nine_context_draw_primitive_from_vtxbuf(struct NineDevice9 *device, context->pipe->draw_vbo(context->pipe, &info); } -void -nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf(struct NineDevice9 *device, - D3DPRIMITIVETYPE PrimitiveType, - UINT MinVertexIndex, - UINT NumVertices, - UINT PrimitiveCount, - struct pipe_vertex_buffer *vbuf, - struct pipe_index_buffer *ibuf) +CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf, + ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType), + ARG_VAL(UINT, MinVertexIndex), + ARG_VAL(UINT, NumVertices), + ARG_VAL(UINT, PrimitiveCount), + ARG_BIND_BUF(struct pipe_vertex_buffer, vbuf), + ARG_BIND_BUF(struct pipe_index_buffer, ibuf)) { struct nine_context *context = &device->context; struct pipe_draw_info info; @@ -2366,27 +2588,29 @@ nine_context_create_query(struct NineDevice9 *device, unsigned query_type) { struct nine_context *context = &device->context; + if (device->csmt_active) + nine_csmt_process(device); return context->pipe->create_query(context->pipe, query_type, 0); } -void -nine_context_destroy_query(struct NineDevice9 *device, struct pipe_query *query) +CSMT_ITEM_DO_WAIT(nine_context_destroy_query, + ARG_REF(struct pipe_query, query)) { struct nine_context *context = &device->context; context->pipe->destroy_query(context->pipe, query); } -void -nine_context_begin_query(struct NineDevice9 *device, struct pipe_query *query) +CSMT_ITEM_NO_WAIT(nine_context_begin_query, + ARG_REF(struct pipe_query, query)) { struct nine_context *context = &device->context; (void) context->pipe->begin_query(context->pipe, query); } -void -nine_context_end_query(struct NineDevice9 *device, struct pipe_query *query) +CSMT_ITEM_NO_WAIT(nine_context_end_query, + ARG_REF(struct pipe_query, query)) { struct nine_context *context = &device->context; @@ -2401,6 +2625,8 @@ nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *que struct nine_context *context = &device->context; (void) flush; + if (device->csmt_active) + nine_csmt_process(device); return context->pipe->get_query_result(context->pipe, query, wait, result); } diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 9e863bcd350..ee88a943ccd 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -337,6 +337,10 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32]; struct NineDevice9; +/* Internal multithreading: When enabled, the nine_context functions + * will append work to a worker thread when possible. Only the worker + * thread can access struct nine_context. */ + void nine_context_set_render_state(struct NineDevice9 *device, D3DRENDERSTATETYPE State, @@ -381,18 +385,21 @@ void nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device, UINT StartRegister, const float *pConstantData, + const unsigned pConstantData_size, UINT Vector4fCount); void nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device, UINT StartRegister, const int *pConstantData, + const unsigned pConstantData_size, UINT Vector4iCount); void nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device, UINT StartRegister, const BOOL *pConstantData, + const unsigned pConstantData_size, UINT BoolCount); void @@ -403,18 +410,21 @@ void nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device, UINT StartRegister, const float *pConstantData, + const unsigned pConstantData_size, UINT Vector4fCount); void nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device, UINT StartRegister, const int *pConstantData, + const unsigned pConstantData_size, UINT Vector4iCount); void nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device, UINT StartRegister, const BOOL *pConstantData, + const unsigned pConstantData_size, UINT BoolCount); void @@ -462,7 +472,7 @@ nine_context_set_depth_stencil(struct NineDevice9 *device, void nine_context_set_clip_plane(struct NineDevice9 *device, DWORD Index, - struct nine_clipplane *pPlane); + const struct nine_clipplane *pPlane); void nine_context_set_swvp(struct NineDevice9 *device, @@ -555,4 +565,26 @@ nine_state_light_enable(struct nine_ff_state *, uint32_t *, const char *nine_d3drs_to_string(DWORD State); +/* CSMT functions */ +struct csmt_context; + +struct csmt_context * +nine_csmt_create( struct NineDevice9 *This ); + +void +nine_csmt_destroy( struct NineDevice9 *This, struct csmt_context *ctx ); + +void +nine_csmt_process( struct NineDevice9 *This ); + + +/* Get the pipe_context (should not be called from the worker thread). + * All the work in the worker thread is finished before returning. */ +struct pipe_context * +nine_context_get_pipe( struct NineDevice9 *device ); + +/* Can be called from all threads */ +struct pipe_context * +nine_context_get_pipe_multithread( struct NineDevice9 *device ); + #endif /* _NINE_STATE_H_ */ diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 92980afe5d6..cb970eae76b 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -94,7 +94,7 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This ) DBG("This=%p\n", This); if (This->base.device) { - struct pipe_context *pipe = NineDevice9_GetPipe(This->base.device); + struct pipe_context *pipe = nine_context_get_pipe_multithread(This->base.device); struct nine_shader_variant *var = &This->variant; do { diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 0ce41223511..f348f501a77 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -31,6 +31,7 @@ #include "nine_helpers.h" #include "nine_pipe.h" #include "nine_dump.h" +#include "nine_state.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index 51aab066a56..9074f2b3ded 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -809,6 +809,8 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, } } + nine_csmt_process(This->base.device); + hr = present(This, pSourceRect, pDestRect, hDestWindowOverride, pDirtyRegion, dwFlags); if (hr == D3DERR_WASSTILLDRAWING) diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index 71a56f4c6a2..79a49d13b6c 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -112,7 +112,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) DBG("This=%p\n", This); if (This->base.device) { - struct pipe_context *pipe = NineDevice9_GetPipe(This->base.device); + struct pipe_context *pipe = nine_context_get_pipe_multithread(This->base.device); struct nine_shader_variant *var = &This->variant; struct nine_shader_variant_so *var_so = &This->variant_so; diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index e0c2e26c02f..6163734185a 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -58,6 +58,7 @@ DRI_CONF_BEGIN DRI_CONF_NINE_THREADSUBMIT("false") DRI_CONF_NINE_ALLOWDISCARDDELAYEDRELEASE("true") DRI_CONF_NINE_TEARFREEDISCARD("false") + DRI_CONF_NINE_CSMT(-1) DRI_CONF_SECTION_END DRI_CONF_END; @@ -301,6 +302,11 @@ drm_create_adapter( int fd, ctx->base.tearfree_discard = FALSE; } + if (driCheckOption(&userInitOptions, "csmt_force", DRI_INT)) + ctx->base.csmt_force = driQueryOptioni(&userInitOptions, "csmt_force"); + else + ctx->base.csmt_force = -1; + driDestroyOptionCache(&userInitOptions); driDestroyOptionInfo(&defaultInitOptions); diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 0ab2efcf117..a189bbedec6 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -386,3 +386,8 @@ DRI_CONF_OPT_END DRI_CONF_OPT_BEGIN_B(tearfree_discard, def) \ DRI_CONF_DESC(en,gettext("Whether to make d3d's presentation mode DISCARD (games usually use that mode) Tear Free. If rendering above screen refresh, some frames will get skipped. false by default.")) \ DRI_CONF_OPT_END + +#define DRI_CONF_NINE_CSMT(def) \ +DRI_CONF_OPT_BEGIN(csmt_force, int, def) \ + DRI_CONF_DESC(en,gettext("If set to 1, force gallium nine CSMT. If set to 0, disable it. By default (-1) CSMT is enabled on known thread-safe drivers.")) \ +DRI_CONF_OPT_END -- 2.30.2