a8573cdf686e2aac696aa266eb1d676b9aab6d30
[mesa.git] / src / gallium / state_trackers / d3d1x / gd3d11 / d3d11_context.h
1 /**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 /* used to unbind things, we need 128 due to resources */
28 static const void* zero_data[128];
29
30 #define UPDATE_VIEWS_SHIFT (D3D11_STAGES * 0)
31 #define UPDATE_SAMPLERS_SHIFT (D3D11_STAGES * 1)
32 #define UPDATE_VERTEX_BUFFERS (1 << (D3D11_STAGES * 2))
33
34 #if API >= 11
35 template<typename PtrTraits>
36 struct GalliumD3D11DeviceContext :
37 public GalliumD3D11DeviceChild<ID3D11DeviceContext>
38 {
39 #else
40 template<bool threadsafe>
41 struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
42 {
43 typedef simple_ptr_traits PtrTraits;
44 typedef GalliumD3D10Device GalliumD3D10DeviceContext;
45 #endif
46
47 refcnt_ptr<GalliumD3D11Shader<>, PtrTraits> shaders[D3D11_STAGES];
48 refcnt_ptr<GalliumD3D11InputLayout, PtrTraits> input_layout;
49 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> index_buffer;
50 refcnt_ptr<GalliumD3D11RasterizerState, PtrTraits> rasterizer_state;
51 refcnt_ptr<GalliumD3D11DepthStencilState, PtrTraits> depth_stencil_state;
52 refcnt_ptr<GalliumD3D11BlendState, PtrTraits> blend_state;
53 refcnt_ptr<GalliumD3D11DepthStencilView, PtrTraits> depth_stencil_view;
54 refcnt_ptr<GalliumD3D11Predicate, PtrTraits> render_predicate;
55
56 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> constant_buffers[D3D11_STAGES][D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
57 refcnt_ptr<GalliumD3D11ShaderResourceView, PtrTraits> shader_resource_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
58 refcnt_ptr<GalliumD3D11SamplerState, PtrTraits> samplers[D3D11_STAGES][D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
59 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> input_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
60 refcnt_ptr<GalliumD3D11RenderTargetView, PtrTraits> render_target_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
61 refcnt_ptr<GalliumD3D11Buffer, PtrTraits> so_targets[D3D11_SO_BUFFER_SLOT_COUNT];
62
63 #if API >= 11
64 refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> cs_unordered_access_views[D3D11_PS_CS_UAV_REGISTER_COUNT];
65 refcnt_ptr<ID3D11UnorderedAccessView, PtrTraits> om_unordered_access_views[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT];
66 #endif
67
68 D3D11_VIEWPORT viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
69 D3D11_RECT scissor_rects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE];
70 unsigned so_offsets[D3D11_SO_BUFFER_SLOT_COUNT];
71 D3D11_PRIMITIVE_TOPOLOGY primitive_topology;
72 DXGI_FORMAT index_format;
73 unsigned index_offset;
74 BOOL render_predicate_value;
75 float blend_color[4];
76 unsigned sample_mask;
77 unsigned stencil_ref;
78 bool depth_clamp;
79
80 void* default_input_layout;
81 void* default_rasterizer;
82 void* default_depth_stencil;
83 void* default_blend;
84 void* default_sampler;
85 void* ld_sampler;
86 void * default_shaders[D3D11_STAGES];
87
88 // derived state
89 int primitive_mode;
90 struct pipe_vertex_buffer vertex_buffers[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
91 struct pipe_resource* so_buffers[D3D11_SO_BUFFER_SLOT_COUNT];
92 struct pipe_sampler_view* sampler_views[D3D11_STAGES][D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT];
93 struct
94 {
95 void* ld; // accessed with a -1 index from v
96 void* v[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT];
97 } sampler_csos[D3D11_STAGES];
98 struct pipe_resource * buffers[D3D11_SO_BUFFER_SLOT_COUNT];
99 unsigned num_shader_resource_views[D3D11_STAGES];
100 unsigned num_samplers[D3D11_STAGES];
101 unsigned num_vertex_buffers;
102 unsigned num_render_target_views;
103 unsigned num_viewports;
104 unsigned num_scissor_rects;
105 unsigned num_so_targets;
106
107 struct pipe_context* pipe;
108 unsigned update_flags;
109
110 bool owns_pipe;
111 unsigned context_flags;
112
113 GalliumD3D11Caps caps;
114
115 cso_context* cso_ctx;
116 gen_mipmap_state* gen_mipmap;
117
118 #if API >= 11
119 #define SYNCHRONIZED do {} while(0)
120
121 GalliumD3D11DeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, bool owns_pipe, unsigned context_flags = 0)
122 : GalliumD3D11DeviceChild(device), pipe(pipe), owns_pipe(owns_pipe), context_flags(context_flags)
123 {
124 caps = device->screen_caps;
125 init_context();
126 }
127
128 ~GalliumD3D11DeviceContext()
129 {
130 destroy_context();
131 }
132 #else
133 #define SYNCHRONIZED lock_t<maybe_mutex_t<threadsafe> > lock_(this->mutex)
134
135 GalliumD3D10Device(pipe_screen* screen, pipe_context* pipe, bool owns_pipe, unsigned creation_flags, IDXGIAdapter* adapter)
136 : GalliumD3D10ScreenImpl<threadsafe>(screen, pipe, owns_pipe, creation_flags, adapter), pipe(pipe), owns_pipe(owns_pipe), context_flags(0)
137 {
138 caps = this->screen_caps;
139 init_context();
140 }
141
142 ~GalliumD3D10Device()
143 {
144 destroy_context();
145 }
146 #endif
147
148 void init_context()
149 {
150 if(!pipe->begin_query)
151 caps.queries = false;
152 if(!pipe->render_condition)
153 caps.render_condition = false;
154 if(!pipe->bind_gs_state)
155 {
156 caps.gs = false;
157 caps.stages = 2;
158 }
159 if(!pipe->set_stream_output_buffers)
160 caps.so = false;
161
162 update_flags = 0;
163
164 // pipeline state
165 memset(viewports, 0, sizeof(viewports));
166 memset(scissor_rects, 0, sizeof(scissor_rects));
167 memset(so_offsets, 0, sizeof(so_offsets));
168 primitive_topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
169 index_format = DXGI_FORMAT_UNKNOWN;
170 index_offset = 0;
171 render_predicate_value = 0;
172 memset(blend_color, 0, sizeof(blend_color));
173 sample_mask = ~0;
174 stencil_ref = 0;
175 depth_clamp = 0;
176
177 // derived state
178 primitive_mode = 0;
179 memset(vertex_buffers, 0, sizeof(vertex_buffers));
180 memset(so_buffers, 0, sizeof(so_buffers));
181 memset(sampler_views, 0, sizeof(sampler_views));
182 memset(sampler_csos, 0, sizeof(sampler_csos));
183 memset(num_shader_resource_views, 0, sizeof(num_shader_resource_views));
184 memset(num_samplers, 0, sizeof(num_samplers));
185 num_vertex_buffers = 0;
186 num_render_target_views = 0;
187 num_viewports = 0;
188 num_scissor_rects = 0;
189 num_so_targets = 0;
190
191 default_input_layout = pipe->create_vertex_elements_state(pipe, 0, 0);
192
193 struct pipe_rasterizer_state rasterizerd;
194 memset(&rasterizerd, 0, sizeof(rasterizerd));
195 rasterizerd.gl_rasterization_rules = 1;
196 rasterizerd.cull_face = PIPE_FACE_BACK;
197 default_rasterizer = pipe->create_rasterizer_state(pipe, &rasterizerd);
198
199 struct pipe_depth_stencil_alpha_state depth_stencild;
200 memset(&depth_stencild, 0, sizeof(depth_stencild));
201 depth_stencild.depth.enabled = TRUE;
202 depth_stencild.depth.writemask = 1;
203 depth_stencild.depth.func = PIPE_FUNC_LESS;
204 default_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &depth_stencild);
205
206 struct pipe_blend_state blendd;
207 memset(&blendd, 0, sizeof(blendd));
208 blendd.rt[0].colormask = 0xf;
209 default_blend = pipe->create_blend_state(pipe, &blendd);
210
211 struct pipe_sampler_state samplerd;
212 memset(&samplerd, 0, sizeof(samplerd));
213 samplerd.normalized_coords = 1;
214 samplerd.min_img_filter = PIPE_TEX_FILTER_LINEAR;
215 samplerd.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
216 samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
217 samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
218 samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
219 samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
220 samplerd.border_color[0] = 1.0f;
221 samplerd.border_color[1] = 1.0f;
222 samplerd.border_color[2] = 1.0f;
223 samplerd.border_color[3] = 1.0f;
224 samplerd.min_lod = -FLT_MAX;
225 samplerd.max_lod = FLT_MAX;
226 samplerd.max_anisotropy = 1;
227 default_sampler = pipe->create_sampler_state(pipe, &samplerd);
228
229 memset(&samplerd, 0, sizeof(samplerd));
230 samplerd.normalized_coords = 0;
231 samplerd.min_img_filter = PIPE_TEX_FILTER_NEAREST;
232 samplerd.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
233 samplerd.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
234 samplerd.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
235 samplerd.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
236 samplerd.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
237 samplerd.min_lod = -FLT_MAX;
238 samplerd.max_lod = FLT_MAX;
239 samplerd.max_anisotropy = 1;
240 ld_sampler = pipe->create_sampler_state(pipe, &samplerd);
241
242 for(unsigned s = 0; s < D3D11_STAGES; ++s)
243 {
244 sampler_csos[s].ld = ld_sampler;
245 for(unsigned i = 0; i < D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; ++i)
246 sampler_csos[s].v[i] = default_sampler;
247 }
248
249 // TODO: should this really be empty shaders, or should they be all-passthrough?
250 memset(default_shaders, 0, sizeof(default_shaders));
251 struct ureg_program *ureg;
252 ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
253 ureg_END(ureg);
254 default_shaders[PIPE_SHADER_FRAGMENT] = ureg_create_shader_and_destroy(ureg, pipe);
255
256 ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
257 ureg_END(ureg);
258 default_shaders[PIPE_SHADER_VERTEX] = ureg_create_shader_and_destroy(ureg, pipe);
259
260 cso_ctx = cso_create_context(pipe);
261 gen_mipmap = util_create_gen_mipmap(pipe, cso_ctx);
262
263 RestoreGalliumState();
264 }
265
266 void destroy_context()
267 {
268 util_destroy_gen_mipmap(gen_mipmap);
269 cso_destroy_context(cso_ctx);
270 pipe->delete_vertex_elements_state(pipe, default_input_layout);
271 pipe->delete_rasterizer_state(pipe, default_rasterizer);
272 pipe->delete_depth_stencil_alpha_state(pipe, default_depth_stencil);
273 pipe->delete_blend_state(pipe, default_blend);
274 pipe->delete_sampler_state(pipe, default_sampler);
275 pipe->delete_sampler_state(pipe, ld_sampler);
276 pipe->delete_fs_state(pipe, default_shaders[PIPE_SHADER_FRAGMENT]);
277 pipe->delete_vs_state(pipe, default_shaders[PIPE_SHADER_VERTEX]);
278 if(owns_pipe)
279 pipe->destroy(pipe);
280 }
281
282 virtual unsigned STDMETHODCALLTYPE GetContextFlags(void)
283 {
284 return context_flags;
285 }
286 #if API >= 11
287 #define SET_SHADER_EXTRA_ARGS , \
288 __in_ecount_opt(NumClassInstances) ID3D11ClassInstance *const *ppClassInstances, \
289 unsigned NumClassInstances
290 #define GET_SHADER_EXTRA_ARGS , \
291 __out_ecount_opt(*pNumClassInstances) ID3D11ClassInstance **ppClassInstances, \
292 __inout_opt unsigned *pNumClassInstances
293 #else
294 #define SET_SHADER_EXTRA_ARGS
295 #define GET_SHADER_EXTRA_ARGS
296 #endif
297
298 /* On Windows D3D11, SetConstantBuffers and SetShaderResources crash if passed a null pointer.
299 * Instead, you have to pass a pointer to nulls to unbind things.
300 * We do the same.
301 * TODO: is D3D10 the same?
302 */
303 template<unsigned s>
304 void xs_set_shader(GalliumD3D11Shader<>* shader)
305 {
306 if(shader != shaders[s].p)
307 {
308 shaders[s] = shader;
309 void* shader_cso = shader ? shader->object : default_shaders[s];
310 switch(s)
311 {
312 case PIPE_SHADER_VERTEX:
313 pipe->bind_vs_state(pipe, shader_cso);
314 break;
315 case PIPE_SHADER_FRAGMENT:
316 pipe->bind_fs_state(pipe, shader_cso);
317 break;
318 case PIPE_SHADER_GEOMETRY:
319 pipe->bind_gs_state(pipe, shader_cso);
320 break;
321 }
322 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s)) | (1 << (UPDATE_VIEWS_SHIFT + s));
323 }
324 }
325
326 template<unsigned s>
327 void xs_set_constant_buffers(unsigned start, unsigned count, GalliumD3D11Buffer *const *constbufs)
328 {
329 for(unsigned i = 0; i < count; ++i)
330 {
331 if(constbufs[i] != constant_buffers[s][i].p)
332 {
333 constant_buffers[s][i] = constbufs[i];
334 if(s < caps.stages && start + i < caps.constant_buffers[s])
335 pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
336 }
337 }
338 }
339
340 template<unsigned s>
341 void xs_set_shader_resources(unsigned start, unsigned count, GalliumD3D11ShaderResourceView *const *srvs)
342 {
343 int last_different = -1;
344 for(unsigned i = 0; i < count; ++i)
345 {
346 if(shader_resource_views[s][start + i].p != srvs[i])
347 {
348 shader_resource_views[s][start + i] = srvs[i];
349 sampler_views[s][start + i] = srvs[i] ? srvs[i]->object : 0;
350 last_different = i;
351 }
352 }
353 if(last_different >= 0)
354 {
355 num_shader_resource_views[s] = std::max(num_shader_resource_views[s], start + last_different + 1);
356 update_flags |= 1 << (UPDATE_VIEWS_SHIFT + s);
357 }
358 }
359
360 template<unsigned s>
361 void xs_set_samplers(unsigned start, unsigned count, GalliumD3D11SamplerState *const *samps)
362 {
363 int last_different = -1;
364 for(unsigned i = 0; i < count; ++i)
365 {
366 if(samplers[s][start + i].p != samps[i])
367 {
368 samplers[s][start + i] = samps[i];
369 sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
370 }
371 if(last_different >= 0)
372 {
373 num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
374 update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
375 }
376 }
377 }
378
379 #define IMPLEMENT_SHADER_STAGE(XS, Stage) \
380 virtual void STDMETHODCALLTYPE XS##SetShader( \
381 __in_opt ID3D11##Stage##Shader *pShader \
382 SET_SHADER_EXTRA_ARGS) \
383 { \
384 SYNCHRONIZED; \
385 xs_set_shader<D3D11_STAGE_##XS>((GalliumD3D11Shader<>*)pShader); \
386 } \
387 virtual void STDMETHODCALLTYPE XS##GetShader(\
388 __out ID3D11##Stage##Shader **ppShader \
389 GET_SHADER_EXTRA_ARGS) \
390 { \
391 SYNCHRONIZED; \
392 *ppShader = (ID3D11##Stage##Shader*)shaders[D3D11_STAGE_##XS].ref(); \
393 } \
394 virtual void STDMETHODCALLTYPE XS##SetConstantBuffers(\
395 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1) unsigned StartSlot, \
396 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot) unsigned NumBuffers, \
397 __in_ecount(NumBuffers) ID3D11Buffer *const *ppConstantBuffers) \
398 { \
399 SYNCHRONIZED; \
400 xs_set_constant_buffers<D3D11_STAGE_##XS>(StartSlot, NumBuffers, (GalliumD3D11Buffer *const *)ppConstantBuffers); \
401 } \
402 virtual void STDMETHODCALLTYPE XS##GetConstantBuffers(\
403 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - 1) unsigned StartSlot, \
404 __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT - StartSlot) unsigned NumBuffers, \
405 __out_ecount(NumBuffers) ID3D11Buffer **ppConstantBuffers) \
406 { \
407 SYNCHRONIZED; \
408 for(unsigned i = 0; i < NumBuffers; ++i) \
409 ppConstantBuffers[i] = constant_buffers[D3D11_STAGE_##XS][StartSlot + i].ref(); \
410 } \
411 virtual void STDMETHODCALLTYPE XS##SetShaderResources(\
412 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, \
413 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumViews, \
414 __in_ecount(NumViews) ID3D11ShaderResourceView *const *ppShaderResourceViews) \
415 { \
416 SYNCHRONIZED; \
417 xs_set_shader_resources<D3D11_STAGE_##XS>(StartSlot, NumViews, (GalliumD3D11ShaderResourceView *const *)ppShaderResourceViews); \
418 } \
419 virtual void STDMETHODCALLTYPE XS##GetShaderResources(\
420 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot, \
421 __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumViews, \
422 __out_ecount(NumViews) ID3D11ShaderResourceView **ppShaderResourceViews) \
423 { \
424 SYNCHRONIZED; \
425 for(unsigned i = 0; i < NumViews; ++i) \
426 ppShaderResourceViews[i] = shader_resource_views[D3D11_STAGE_##XS][StartSlot + i].ref(); \
427 } \
428 virtual void STDMETHODCALLTYPE XS##SetSamplers(\
429 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1) unsigned StartSlot, \
430 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot) unsigned NumSamplers, \
431 __in_ecount(NumSamplers) ID3D11SamplerState *const *ppSamplers) \
432 { \
433 SYNCHRONIZED; \
434 xs_set_samplers<D3D11_STAGE_##XS>(StartSlot, NumSamplers, (GalliumD3D11SamplerState *const *)ppSamplers); \
435 } \
436 virtual void STDMETHODCALLTYPE XS##GetSamplers( \
437 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - 1) unsigned StartSlot, \
438 __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT - StartSlot) unsigned NumSamplers, \
439 __out_ecount(NumSamplers) ID3D11SamplerState **ppSamplers) \
440 { \
441 SYNCHRONIZED; \
442 for(unsigned i = 0; i < NumSamplers; ++i) \
443 ppSamplers[i] = samplers[D3D11_STAGE_##XS][StartSlot + i].ref(); \
444 }
445
446 #define DO_VS(x) x
447 #define DO_GS(x) do {if(caps.gs) {x;}} while(0)
448 #define DO_PS(x) x
449 #define DO_HS(x)
450 #define DO_DS(x)
451 #define DO_CS(x)
452 IMPLEMENT_SHADER_STAGE(VS, Vertex)
453 IMPLEMENT_SHADER_STAGE(GS, Geometry)
454 IMPLEMENT_SHADER_STAGE(PS, Pixel)
455
456 #if API >= 11
457 IMPLEMENT_SHADER_STAGE(HS, Hull)
458 IMPLEMENT_SHADER_STAGE(DS, Domain)
459 IMPLEMENT_SHADER_STAGE(CS, Compute)
460
461 virtual void STDMETHODCALLTYPE CSSetUnorderedAccessViews(
462 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned StartSlot,
463 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot) unsigned NumUAVs,
464 __in_ecount(NumUAVs) ID3D11UnorderedAccessView *const *ppUnorderedAccessViews,
465 __in_ecount(NumUAVs) const unsigned *pUAVInitialCounts)
466 {
467 SYNCHRONIZED;
468 for(unsigned i = 0; i < NumUAVs; ++i)
469 cs_unordered_access_views[StartSlot + i] = ppUnorderedAccessViews[i];
470 }
471
472 virtual void STDMETHODCALLTYPE CSGetUnorderedAccessViews(
473 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned StartSlot,
474 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - StartSlot) unsigned NumUAVs,
475 __out_ecount(NumUAVs) ID3D11UnorderedAccessView **ppUnorderedAccessViews)
476 {
477 SYNCHRONIZED;
478 for(unsigned i = 0; i < NumUAVs; ++i)
479 ppUnorderedAccessViews[i] = cs_unordered_access_views[StartSlot + i].ref();
480 }
481 #endif
482
483 template<unsigned s>
484 void update_stage()
485 {
486 if(update_flags & (1 << (UPDATE_VIEWS_SHIFT + s)))
487 {
488 while(num_shader_resource_views[s] && !sampler_views[s][num_shader_resource_views[s] - 1]) \
489 --num_shader_resource_views[s];
490 if(s < caps.stages)
491 {
492 struct pipe_sampler_view* views_to_bind[PIPE_MAX_SAMPLERS];
493 unsigned num_views_to_bind = shaders[s] ? shaders[s]->slot_to_resource.size() : 0;
494 for(unsigned i = 0; i < num_views_to_bind; ++i)
495 {
496 views_to_bind[i] = sampler_views[s][shaders[s]->slot_to_resource[i]];
497 }
498 switch(s)
499 {
500 case PIPE_SHADER_VERTEX:
501 pipe->set_vertex_sampler_views(pipe, num_views_to_bind, views_to_bind);
502 break;
503 case PIPE_SHADER_FRAGMENT:
504 pipe->set_fragment_sampler_views(pipe, num_views_to_bind, views_to_bind);
505 break;
506 case PIPE_SHADER_GEOMETRY:
507 pipe->set_geometry_sampler_views(pipe, num_views_to_bind, views_to_bind);
508 break;
509 }
510 }
511 }
512
513 if(update_flags & (1 << (UPDATE_SAMPLERS_SHIFT + s)))
514 {
515 while(num_samplers[s] && !sampler_csos[s].v[num_samplers[s] - 1])
516 --num_samplers[s];
517 if(s < caps.stages)
518 {
519 void* samplers_to_bind[PIPE_MAX_SAMPLERS];
520 unsigned num_samplers_to_bind = shaders[s] ? shaders[s]->slot_to_sampler.size() : 0;
521 for(unsigned i = 0; i < num_samplers_to_bind; ++i)
522 {
523 // index can be -1 to access sampler_csos[s].ld
524 samplers_to_bind[i] = *(sampler_csos[s].v + shaders[s]->slot_to_sampler[i]);
525 }
526 switch(s)
527 {
528 case PIPE_SHADER_VERTEX:
529 pipe->bind_vertex_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
530 break;
531 case PIPE_SHADER_FRAGMENT:
532 pipe->bind_fragment_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
533 break;
534 case PIPE_SHADER_GEOMETRY:
535 pipe->bind_geometry_sampler_states(pipe, num_samplers_to_bind, samplers_to_bind);
536 break;
537 }
538 }
539 }
540 }
541
542 void update_state()
543 {
544 update_stage<D3D11_STAGE_PS>();
545 update_stage<D3D11_STAGE_VS>();
546 update_stage<D3D11_STAGE_GS>();
547 #if API >= 11
548 update_stage<D3D11_STAGE_HS>();
549 update_stage<D3D11_STAGE_DS>();
550 update_stage<D3D11_STAGE_CS>();
551 #endif
552
553 if(update_flags & UPDATE_VERTEX_BUFFERS)
554 {
555 while(num_vertex_buffers && !vertex_buffers[num_vertex_buffers - 1].buffer)
556 --num_vertex_buffers;
557 pipe->set_vertex_buffers(pipe, num_vertex_buffers, vertex_buffers);
558 }
559
560 update_flags = 0;
561 }
562
563 virtual void STDMETHODCALLTYPE IASetInputLayout(
564 __in_opt ID3D11InputLayout *pInputLayout)
565 {
566 SYNCHRONIZED;
567 if(pInputLayout != input_layout.p)
568 {
569 input_layout = pInputLayout;
570 pipe->bind_vertex_elements_state(pipe, pInputLayout ? ((GalliumD3D11InputLayout*)pInputLayout)->object : default_input_layout);
571 }
572 }
573
574 virtual void STDMETHODCALLTYPE IAGetInputLayout(
575 __out ID3D11InputLayout **ppInputLayout)
576 {
577 SYNCHRONIZED;
578 *ppInputLayout = input_layout.ref();
579 }
580
581 virtual void STDMETHODCALLTYPE IASetVertexBuffers(
582 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot,
583 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumBuffers,
584 __in_ecount(NumBuffers) ID3D11Buffer *const *ppVertexBuffers,
585 __in_ecount(NumBuffers) const unsigned *pStrides,
586 __in_ecount(NumBuffers) const unsigned *pOffsets)
587 {
588 SYNCHRONIZED;
589 int last_different = -1;
590 for(unsigned i = 0; i < NumBuffers; ++i)
591 {
592 ID3D11Buffer* buffer = ppVertexBuffers[i];
593 if(buffer != input_buffers[StartSlot + i].p
594 || vertex_buffers[StartSlot + i].buffer_offset != pOffsets[i]
595 || vertex_buffers[StartSlot + i].stride != pOffsets[i]
596 )
597 {
598 input_buffers[StartSlot + i] = buffer;
599 vertex_buffers[StartSlot + i].buffer = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
600 vertex_buffers[StartSlot + i].buffer_offset = pOffsets[i];
601 vertex_buffers[StartSlot + i].stride = pStrides[i];
602 vertex_buffers[StartSlot + i].max_index = ~0;
603 last_different = i;
604 }
605 }
606 if(last_different >= 0)
607 {
608 num_vertex_buffers = std::max(num_vertex_buffers, StartSlot + NumBuffers);
609 update_flags |= UPDATE_VERTEX_BUFFERS;
610 }
611 }
612
613 virtual void STDMETHODCALLTYPE IAGetVertexBuffers(
614 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1) unsigned StartSlot,
615 __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - StartSlot) unsigned NumBuffers,
616 __out_ecount_opt(NumBuffers) ID3D11Buffer **ppVertexBuffers,
617 __out_ecount_opt(NumBuffers) unsigned *pStrides,
618 __out_ecount_opt(NumBuffers) unsigned *pOffsets)
619 {
620 SYNCHRONIZED;
621 if(ppVertexBuffers)
622 {
623 for(unsigned i = 0; i < NumBuffers; ++i)
624 ppVertexBuffers[i] = input_buffers[StartSlot + i].ref();
625 }
626
627 if(pOffsets)
628 {
629 for(unsigned i = 0; i < NumBuffers; ++i)
630 pOffsets[i] = vertex_buffers[StartSlot + i].buffer_offset;
631 }
632
633 if(pStrides)
634 {
635 for(unsigned i = 0; i < NumBuffers; ++i)
636 pStrides[i] = vertex_buffers[StartSlot + i].stride;
637 }
638 }
639
640 void set_index_buffer()
641 {
642 pipe_index_buffer ib;
643 if(!index_buffer)
644 {
645 memset(&ib, 0, sizeof(ib));
646 }
647 else
648 {
649 if(index_format == DXGI_FORMAT_R32_UINT)
650 ib.index_size = 4;
651 else if(index_format == DXGI_FORMAT_R16_UINT)
652 ib.index_size = 2;
653 else
654 ib.index_size = 1;
655 ib.offset = index_offset;
656 ib.buffer = index_buffer ? ((GalliumD3D11Buffer*)index_buffer.p)->resource : 0;
657 }
658 pipe->set_index_buffer(pipe, &ib);
659 }
660
661 virtual void STDMETHODCALLTYPE IASetIndexBuffer(
662 __in_opt ID3D11Buffer *pIndexBuffer,
663 __in DXGI_FORMAT Format,
664 __in unsigned Offset)
665 {
666 SYNCHRONIZED;
667 if(index_buffer.p != pIndexBuffer || index_format != Format || index_offset != Offset)
668 {
669 index_buffer = pIndexBuffer;
670 index_format = Format;
671 index_offset = Offset;
672
673 set_index_buffer();
674 }
675 }
676
677 virtual void STDMETHODCALLTYPE IAGetIndexBuffer(
678 __out_opt ID3D11Buffer **pIndexBuffer,
679 __out_opt DXGI_FORMAT *Format,
680 __out_opt unsigned *Offset)
681 {
682 SYNCHRONIZED;
683 if(pIndexBuffer)
684 *pIndexBuffer = index_buffer.ref();
685 if(Format)
686 *Format = index_format;
687 if(Offset)
688 *Offset = index_offset;
689 }
690
691 virtual void STDMETHODCALLTYPE IASetPrimitiveTopology(
692 __in D3D11_PRIMITIVE_TOPOLOGY Topology)
693 {
694 SYNCHRONIZED;
695 if(primitive_topology != Topology)
696 {
697 if(Topology < D3D_PRIMITIVE_TOPOLOGY_COUNT)
698 primitive_mode = d3d_to_pipe_prim[Topology];
699 else
700 primitive_mode = 0;
701 primitive_topology = Topology;
702 }
703 }
704
705 virtual void STDMETHODCALLTYPE IAGetPrimitiveTopology(
706 __out D3D11_PRIMITIVE_TOPOLOGY *pTopology)
707 {
708 SYNCHRONIZED;
709 *pTopology = primitive_topology;
710 }
711
712 virtual void STDMETHODCALLTYPE DrawIndexed(
713 __in unsigned IndexCount,
714 __in unsigned StartIndexLocation,
715 __in int BaseVertexLocation)
716 {
717 SYNCHRONIZED;
718 if(update_flags)
719 update_state();
720
721 pipe_draw_info info;
722 info.mode = primitive_mode;
723 info.indexed = TRUE;
724 info.count = IndexCount;
725 info.start = StartIndexLocation;
726 info.index_bias = BaseVertexLocation;
727 info.min_index = 0;
728 info.max_index = ~0;
729 info.start_instance = 0;
730 info.instance_count = 1;
731
732 pipe->draw_vbo(pipe, &info);
733 }
734
735 virtual void STDMETHODCALLTYPE Draw(
736 __in unsigned VertexCount,
737 __in unsigned StartVertexLocation)
738 {
739 SYNCHRONIZED;
740 if(update_flags)
741 update_state();
742
743 pipe_draw_info info;
744 info.mode = primitive_mode;
745 info.indexed = FALSE;
746 info.count = VertexCount;
747 info.start = StartVertexLocation;
748 info.index_bias = 0;
749 info.min_index = 0;
750 info.max_index = ~0;
751 info.start_instance = 0;
752 info.instance_count = 1;
753
754 pipe->draw_vbo(pipe, &info);
755 }
756
757 virtual void STDMETHODCALLTYPE DrawIndexedInstanced(
758 __in unsigned IndexCountPerInstance,
759 __in unsigned InstanceCount,
760 __in unsigned StartIndexLocation,
761 __in int BaseVertexLocation,
762 __in unsigned StartInstanceLocation)
763 {
764 SYNCHRONIZED;
765 if(update_flags)
766 update_state();
767
768 pipe_draw_info info;
769 info.mode = primitive_mode;
770 info.indexed = TRUE;
771 info.count = IndexCountPerInstance;
772 info.start = StartIndexLocation;
773 info.index_bias = BaseVertexLocation;
774 info.min_index = 0;
775 info.max_index = ~0;
776 info.start_instance = StartInstanceLocation;
777 info.instance_count = InstanceCount;
778
779 pipe->draw_vbo(pipe, &info);
780 }
781
782 virtual void STDMETHODCALLTYPE DrawInstanced(
783 __in unsigned VertexCountPerInstance,
784 __in unsigned InstanceCount,
785 __in unsigned StartVertexLocation,
786 __in unsigned StartInstanceLocation)
787 {
788 SYNCHRONIZED;
789 if(update_flags)
790 update_state();
791
792 pipe_draw_info info;
793 info.mode = primitive_mode;
794 info.indexed = FALSE;
795 info.count = VertexCountPerInstance;
796 info.start = StartVertexLocation;
797 info.index_bias = 0;
798 info.min_index = 0;
799 info.max_index = ~0;
800 info.start_instance = StartInstanceLocation;
801 info.instance_count = InstanceCount;
802
803 pipe->draw_vbo(pipe, &info);
804 }
805
806 virtual void STDMETHODCALLTYPE DrawAuto(void)
807 {
808 if(!caps.so)
809 return;
810
811 SYNCHRONIZED;
812 if(update_flags)
813 update_state();
814
815 pipe->draw_stream_output(pipe, primitive_mode);
816 }
817
818 virtual void STDMETHODCALLTYPE DrawIndexedInstancedIndirect(
819 __in ID3D11Buffer *pBufferForArgs,
820 __in unsigned AlignedByteOffsetForArgs)
821 {
822 SYNCHRONIZED;
823 if(update_flags)
824 update_state();
825
826 struct {
827 unsigned count;
828 unsigned instance_count;
829 unsigned start;
830 unsigned index_bias;
831 } data;
832
833 pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data);
834
835 pipe_draw_info info;
836 info.mode = primitive_mode;
837 info.indexed = TRUE;
838 info.start = data.start;
839 info.count = data.count;
840 info.index_bias = data.index_bias;
841 info.min_index = 0;
842 info.max_index = ~0;
843 info.start_instance = 0;
844 info.instance_count = data.instance_count;
845
846 pipe->draw_vbo(pipe, &info);
847 }
848
849 virtual void STDMETHODCALLTYPE DrawInstancedIndirect(
850 __in ID3D11Buffer *pBufferForArgs,
851 __in unsigned AlignedByteOffsetForArgs)
852 {
853 SYNCHRONIZED;
854 if(update_flags)
855 update_state();
856
857 struct {
858 unsigned count;
859 unsigned instance_count;
860 unsigned start;
861 } data;
862
863 pipe_buffer_read(pipe, ((GalliumD3D11Buffer*)pBufferForArgs)->resource, AlignedByteOffsetForArgs, sizeof(data), &data);
864
865 pipe_draw_info info;
866 info.mode = primitive_mode;
867 info.indexed = FALSE;
868 info.start = data.start;
869 info.count = data.count;
870 info.index_bias = 0;
871 info.min_index = 0;
872 info.max_index = ~0;
873 info.start_instance = 0;
874 info.instance_count = data.instance_count;
875
876 pipe->draw_vbo(pipe, &info);
877 }
878
879 #if API >= 11
880 virtual void STDMETHODCALLTYPE Dispatch(
881 __in unsigned ThreadGroupCountX,
882 __in unsigned ThreadGroupCountY,
883 __in unsigned ThreadGroupCountZ)
884 {
885 // uncomment this when this is implemented
886 // SYNCHRONIZED;
887 // if(update_flags)
888 // update_state();
889 }
890
891 virtual void STDMETHODCALLTYPE DispatchIndirect(
892 __in ID3D11Buffer *pBufferForArgs,
893 __in unsigned AlignedByteOffsetForArgs)
894 {
895 // uncomment this when this is implemented
896 // SYNCHRONIZED;
897 // if(update_flags)
898 // update_state();
899 }
900 #endif
901
902 void set_clip()
903 {
904 SYNCHRONIZED;
905 pipe_clip_state clip;
906 clip.nr = 0;
907 clip.depth_clamp = depth_clamp;
908 pipe->set_clip_state(pipe, &clip);
909 }
910
911 virtual void STDMETHODCALLTYPE RSSetState(
912 __in_opt ID3D11RasterizerState *pRasterizerState)
913 {
914 SYNCHRONIZED;
915 if(pRasterizerState != rasterizer_state.p)
916 {
917 rasterizer_state = pRasterizerState;
918 pipe->bind_rasterizer_state(pipe, pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->object : default_rasterizer);
919 bool new_depth_clamp = pRasterizerState ? ((GalliumD3D11RasterizerState*)pRasterizerState)->depth_clamp : false;
920 if(depth_clamp != new_depth_clamp)
921 {
922 depth_clamp = new_depth_clamp;
923 set_clip();
924 }
925 }
926 }
927
928 virtual void STDMETHODCALLTYPE RSGetState(
929 __out ID3D11RasterizerState **ppRasterizerState)
930 {
931 SYNCHRONIZED;
932 *ppRasterizerState = rasterizer_state.ref();
933 }
934
935 void set_viewport()
936 {
937 // TODO: is depth correct? it seems D3D10/11 uses a [-1,1]x[-1,1]x[0,1] cube
938 pipe_viewport_state viewport;
939 float half_width = viewports[0].Width * 0.5f;
940 float half_height = viewports[0].Height * 0.5f;
941
942 viewport.scale[0] = half_width;
943 viewport.scale[1] = -half_height;
944 viewport.scale[2] = (viewports[0].MaxDepth - viewports[0].MinDepth);
945 viewport.scale[3] = 1.0f;
946 viewport.translate[0] = half_width + viewports[0].TopLeftX;
947 viewport.translate[1] = half_height + viewports[0].TopLeftY;
948 viewport.translate[2] = viewports[0].MinDepth;
949 viewport.translate[3] = 1.0f;
950 pipe->set_viewport_state(pipe, &viewport);
951 }
952
953 virtual void STDMETHODCALLTYPE RSSetViewports(
954 __in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned NumViewports,
955 __in_ecount_opt(NumViewports) const D3D11_VIEWPORT *pViewports)
956 {
957 SYNCHRONIZED;
958 if(NumViewports)
959 {
960 if(memcmp(&viewports[0], &pViewports[0], sizeof(viewports[0])))
961 {
962 viewports[0] = pViewports[0];
963 set_viewport();
964 }
965 for(unsigned i = 1; i < NumViewports; ++i)
966 viewports[i] = pViewports[i];
967 }
968 else if(num_viewports)
969 {
970 // TODO: what should we do here?
971 memset(&viewports[0], 0, sizeof(viewports[0]));
972 set_viewport();
973 }
974 num_viewports = NumViewports;
975 }
976
977 virtual void STDMETHODCALLTYPE RSGetViewports(
978 __inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned *pNumViewports,
979 __out_ecount_opt(*pNumViewports) D3D11_VIEWPORT *pViewports)
980 {
981 SYNCHRONIZED;
982 if(pViewports)
983 {
984 unsigned i;
985 for(i = 0; i < std::min(*pNumViewports, num_viewports); ++i)
986 pViewports[i] = viewports[i];
987
988 memset(pViewports + i, 0, (*pNumViewports - i) * sizeof(D3D11_VIEWPORT));
989 }
990
991 *pNumViewports = num_viewports;
992 }
993
994 void set_scissor()
995 {
996 pipe_scissor_state scissor;
997 scissor.minx = scissor_rects[0].left;
998 scissor.miny = scissor_rects[0].top;
999 scissor.maxx = scissor_rects[0].right;
1000 scissor.maxy = scissor_rects[0].bottom;
1001 pipe->set_scissor_state(pipe, &scissor);
1002 }
1003
1004 virtual void STDMETHODCALLTYPE RSSetScissorRects(
1005 __in_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned NumRects,
1006 __in_ecount_opt(NumRects) const D3D11_RECT *pRects)
1007 {
1008 SYNCHRONIZED;
1009 if(NumRects)
1010 {
1011 if(memcmp(&scissor_rects[0], &pRects[0], sizeof(scissor_rects[0])))
1012 {
1013 scissor_rects[0] = pRects[0];
1014 set_scissor();
1015 }
1016 for(unsigned i = 1; i < NumRects; ++i)
1017 scissor_rects[i] = pRects[i];
1018 }
1019 else if(num_scissor_rects)
1020 {
1021 // TODO: what should we do here?
1022 memset(&scissor_rects[0], 0, sizeof(scissor_rects[0]));
1023 set_scissor();
1024 }
1025
1026 num_scissor_rects = NumRects;
1027 }
1028
1029 virtual void STDMETHODCALLTYPE RSGetScissorRects(
1030 __inout_range(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) unsigned *pNumRects,
1031 __out_ecount_opt(*pNumRects) D3D11_RECT *pRects)
1032 {
1033 SYNCHRONIZED;
1034 if(pRects)
1035 {
1036 unsigned i;
1037 for(i = 0; i < std::min(*pNumRects, num_scissor_rects); ++i)
1038 pRects[i] = scissor_rects[i];
1039
1040 memset(pRects + i, 0, (*pNumRects - i) * sizeof(D3D11_RECT));
1041 }
1042
1043 *pNumRects = num_scissor_rects;
1044 }
1045
1046 virtual void STDMETHODCALLTYPE OMSetBlendState(
1047 __in_opt ID3D11BlendState *pBlendState,
1048 __in_opt const float BlendFactor[ 4 ],
1049 __in unsigned SampleMask)
1050 {
1051 SYNCHRONIZED;
1052 float white[4] = {1.0f, 1.0f, 1.0f, 1.0f};
1053
1054 if(blend_state.p != pBlendState)
1055 {
1056 pipe->bind_blend_state(pipe, pBlendState ? ((GalliumD3D11BlendState*)pBlendState)->object : default_blend);
1057 blend_state = pBlendState;
1058 }
1059
1060 // Windows D3D11 does this, even though it's apparently undocumented
1061 if(!BlendFactor)
1062 BlendFactor = white;
1063
1064 if(memcmp(blend_color, BlendFactor, sizeof(blend_color)))
1065 {
1066 pipe->set_blend_color(pipe, (struct pipe_blend_color*)BlendFactor);
1067 memcpy(blend_color, BlendFactor, sizeof(blend_color));
1068 }
1069
1070 if(sample_mask != SampleMask)
1071 {
1072 pipe->set_sample_mask(pipe, sample_mask);
1073 sample_mask = SampleMask;
1074 }
1075 }
1076
1077 virtual void STDMETHODCALLTYPE OMGetBlendState(
1078 __out_opt ID3D11BlendState **ppBlendState,
1079 __out_opt float BlendFactor[ 4 ],
1080 __out_opt unsigned *pSampleMask)
1081 {
1082 SYNCHRONIZED;
1083 if(ppBlendState)
1084 *ppBlendState = blend_state.ref();
1085 if(BlendFactor)
1086 memcpy(BlendFactor, blend_color, sizeof(blend_color));
1087 if(pSampleMask)
1088 *pSampleMask = sample_mask;
1089 }
1090
1091 void set_stencil_ref()
1092 {
1093 struct pipe_stencil_ref sref;
1094 sref.ref_value[0] = stencil_ref;
1095 sref.ref_value[1] = stencil_ref;
1096 pipe->set_stencil_ref(pipe, &sref);
1097 }
1098
1099 virtual void STDMETHODCALLTYPE OMSetDepthStencilState(
1100 __in_opt ID3D11DepthStencilState *pDepthStencilState,
1101 __in unsigned StencilRef)
1102 {
1103 SYNCHRONIZED;
1104 if(pDepthStencilState != depth_stencil_state.p)
1105 {
1106 pipe->bind_depth_stencil_alpha_state(pipe, pDepthStencilState ? ((GalliumD3D11DepthStencilState*)pDepthStencilState)->object : default_depth_stencil);
1107 depth_stencil_state = pDepthStencilState;
1108 }
1109
1110 if(StencilRef != stencil_ref)
1111 {
1112 stencil_ref = StencilRef;
1113 set_stencil_ref();
1114 }
1115 }
1116
1117 virtual void STDMETHODCALLTYPE OMGetDepthStencilState(
1118 __out_opt ID3D11DepthStencilState **ppDepthStencilState,
1119 __out_opt unsigned *pStencilRef)
1120 {
1121 SYNCHRONIZED;
1122 if(*ppDepthStencilState)
1123 *ppDepthStencilState = depth_stencil_state.ref();
1124 if(pStencilRef)
1125 *pStencilRef = stencil_ref;
1126 }
1127
1128 void set_framebuffer()
1129 {
1130 struct pipe_framebuffer_state fb;
1131 memset(&fb, 0, sizeof(fb));
1132 if(depth_stencil_view)
1133 {
1134 struct pipe_surface* surf = ((GalliumD3D11DepthStencilView*)depth_stencil_view.p)->object;
1135 fb.zsbuf = surf;
1136 if(surf->width > fb.width)
1137 fb.width = surf->width;
1138 if(surf->height > fb.height)
1139 fb.height = surf->height;
1140 }
1141 fb.nr_cbufs = num_render_target_views;
1142 unsigned i;
1143 for(i = 0; i < num_render_target_views; ++i)
1144 {
1145 if(render_target_views[i])
1146 {
1147 struct pipe_surface* surf = ((GalliumD3D11RenderTargetView*)render_target_views[i].p)->object;
1148 fb.cbufs[i] = surf;
1149 if(surf->width > fb.width)
1150 fb.width = surf->width;
1151 if(surf->height > fb.height)
1152 fb.height = surf->height;
1153 }
1154 }
1155
1156 pipe->set_framebuffer_state(pipe, &fb);
1157 }
1158
1159 /* TODO: the docs say that we should unbind conflicting resources (e.g. those bound for read while we are binding them for write too), but we aren't.
1160 * Hopefully nobody relies on this happening
1161 */
1162
1163 virtual void STDMETHODCALLTYPE OMSetRenderTargets(
1164 __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumViews,
1165 __in_ecount_opt(NumViews) ID3D11RenderTargetView *const *ppRenderTargetViews,
1166 __in_opt ID3D11DepthStencilView *pDepthStencilView)
1167 {
1168 SYNCHRONIZED;
1169 if(!ppRenderTargetViews)
1170 NumViews = 0;
1171 if(NumViews == num_render_target_views)
1172 {
1173 for(unsigned i = 0; i < NumViews; ++i)
1174 {
1175 if(ppRenderTargetViews[i] != render_target_views[i].p)
1176 goto changed;
1177 }
1178 return;
1179 }
1180 changed:
1181 depth_stencil_view = pDepthStencilView;
1182 unsigned i;
1183 for(i = 0; i < NumViews; ++i)
1184 {
1185 render_target_views[i] = ppRenderTargetViews[i];
1186 #if API >= 11
1187 om_unordered_access_views[i] = (ID3D11UnorderedAccessView*)NULL;
1188 #endif
1189 }
1190 for(; i < num_render_target_views; ++i)
1191 render_target_views[i] = (ID3D11RenderTargetView*)NULL;
1192 num_render_target_views = NumViews;
1193 set_framebuffer();
1194 }
1195
1196 virtual void STDMETHODCALLTYPE OMGetRenderTargets(
1197 __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumViews,
1198 __out_ecount_opt(NumViews) ID3D11RenderTargetView **ppRenderTargetViews,
1199 __out_opt ID3D11DepthStencilView **ppDepthStencilView)
1200 {
1201 SYNCHRONIZED;
1202 if(ppRenderTargetViews)
1203 {
1204 unsigned i;
1205 for(i = 0; i < std::min(num_render_target_views, NumViews); ++i)
1206 ppRenderTargetViews[i] = render_target_views[i].ref();
1207
1208 for(; i < NumViews; ++i)
1209 ppRenderTargetViews[i] = 0;
1210 }
1211
1212 if(ppDepthStencilView)
1213 *ppDepthStencilView = depth_stencil_view.ref();
1214 }
1215
1216 #if API >= 11
1217 /* TODO: what is this supposed to do _exactly_? are we doing the right thing? */
1218 virtual void STDMETHODCALLTYPE OMSetRenderTargetsAndUnorderedAccessViews(
1219 __in unsigned NumRTVs,
1220 __in_ecount_opt(NumRTVs) ID3D11RenderTargetView *const *ppRenderTargetViews,
1221 __in_opt ID3D11DepthStencilView *pDepthStencilView,
1222 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned UAVStartSlot,
1223 __in unsigned NumUAVs,
1224 __in_ecount_opt(NumUAVs) ID3D11UnorderedAccessView *const *ppUnorderedAccessViews,
1225 __in_ecount_opt(NumUAVs) const unsigned *pUAVInitialCounts)
1226 {
1227 SYNCHRONIZED;
1228 if(NumRTVs != D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
1229 OMSetRenderTargets(NumRTVs, ppRenderTargetViews, pDepthStencilView);
1230
1231 if(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)
1232 {
1233 for(unsigned i = 0; i < NumUAVs; ++i)
1234 {
1235 om_unordered_access_views[UAVStartSlot + i] = ppUnorderedAccessViews[i];
1236 render_target_views[UAVStartSlot + i] = (ID3D11RenderTargetView*)0;
1237 }
1238 }
1239 }
1240
1241 virtual void STDMETHODCALLTYPE OMGetRenderTargetsAndUnorderedAccessViews(
1242 __in_range(0, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT) unsigned NumRTVs,
1243 __out_ecount_opt(NumRTVs) ID3D11RenderTargetView **ppRenderTargetViews,
1244 __out_opt ID3D11DepthStencilView **ppDepthStencilView,
1245 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - 1) unsigned UAVStartSlot,
1246 __in_range(0, D3D11_PS_CS_UAV_REGISTER_COUNT - UAVStartSlot) unsigned NumUAVs,
1247 __out_ecount_opt(NumUAVs) ID3D11UnorderedAccessView **ppUnorderedAccessViews)
1248 {
1249 SYNCHRONIZED;
1250 if(ppRenderTargetViews)
1251 OMGetRenderTargets(NumRTVs, ppRenderTargetViews, ppDepthStencilView);
1252
1253 if(ppUnorderedAccessViews)
1254 {
1255 for(unsigned i = 0; i < NumUAVs; ++i)
1256 ppUnorderedAccessViews[i] = om_unordered_access_views[UAVStartSlot + i].ref();
1257 }
1258 }
1259 #endif
1260
1261 virtual void STDMETHODCALLTYPE SOSetTargets(
1262 __in_range(0, D3D11_SO_BUFFER_SLOT_COUNT) unsigned NumBuffers,
1263 __in_ecount_opt(NumBuffers) ID3D11Buffer *const *ppSOTargets,
1264 __in_ecount_opt(NumBuffers) const unsigned *pOffsets)
1265 {
1266 SYNCHRONIZED;
1267 unsigned i;
1268 if(!ppSOTargets)
1269 NumBuffers = 0;
1270 bool changed = false;
1271 for(i = 0; i < NumBuffers; ++i)
1272 {
1273 ID3D11Buffer* buffer = ppSOTargets[i];
1274 if(buffer != so_targets[i].p || pOffsets[i] != so_offsets[i])
1275 {
1276 so_buffers[i] = buffer ? ((GalliumD3D11Buffer*)buffer)->resource : 0;
1277 so_targets[i] = buffer;
1278 so_offsets[i] = pOffsets[i];
1279 changed = true;
1280 }
1281 }
1282 for(; i < D3D11_SO_BUFFER_SLOT_COUNT; ++i)
1283 {
1284 if(so_targets[i].p || so_offsets[i])
1285 {
1286 changed = true;
1287 so_targets[i] = (ID3D11Buffer*)0;
1288 so_offsets[i] = 0;
1289 }
1290 }
1291 num_so_targets = NumBuffers;
1292
1293 if(changed && caps.so)
1294 pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1295 }
1296
1297 virtual void STDMETHODCALLTYPE SOGetTargets(
1298 __in_range(0, D3D11_SO_BUFFER_SLOT_COUNT) unsigned NumBuffers,
1299 __out_ecount(NumBuffers) ID3D11Buffer **ppSOTargets
1300 #if API < 11
1301 , __out_ecount(NumBuffers) UINT *pOffsets
1302 #endif
1303 )
1304 {
1305 SYNCHRONIZED;
1306 for(unsigned i = 0; i < NumBuffers; ++i)
1307 {
1308 ppSOTargets[i] = so_targets[i].ref();
1309 #if API < 11
1310 pOffsets[i] = so_offsets[i];
1311 #endif
1312 }
1313 }
1314
1315 virtual void STDMETHODCALLTYPE Begin(
1316 __in ID3D11Asynchronous *pAsync)
1317 {
1318 SYNCHRONIZED;
1319 if(caps.queries)
1320 pipe->begin_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query);
1321 }
1322
1323 virtual void STDMETHODCALLTYPE End(
1324 __in ID3D11Asynchronous *pAsync)
1325 {
1326 SYNCHRONIZED;
1327 if(caps.queries)
1328 pipe->end_query(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query);
1329 }
1330
1331 virtual HRESULT STDMETHODCALLTYPE GetData(
1332 __in ID3D11Asynchronous *pAsync,
1333 __out_bcount_opt(DataSize) void *pData,
1334 __in unsigned DataSize,
1335 __in unsigned GetDataFlags)
1336 {
1337 SYNCHRONIZED;
1338 if(!caps.queries)
1339 return E_NOTIMPL;
1340
1341 GalliumD3D11Asynchronous<>* async = (GalliumD3D11Asynchronous<>*)pAsync;
1342 void* data = alloca(async->data_size);
1343 boolean ret = pipe->get_query_result(pipe, ((GalliumD3D11Asynchronous<>*)pAsync)->query, !(GetDataFlags & D3D11_ASYNC_GETDATA_DONOTFLUSH), data);
1344 if(pData)
1345 memcpy(pData, data, std::min(async->data_size, DataSize));
1346 return ret ? S_OK : S_FALSE;
1347 }
1348
1349 void set_render_condition()
1350 {
1351 if(caps.render_condition)
1352 {
1353 if(!render_predicate)
1354 pipe->render_condition(pipe, 0, 0);
1355 else
1356 {
1357 GalliumD3D11Predicate* predicate = (GalliumD3D11Predicate*)render_predicate.p;
1358 if(!render_predicate_value && predicate->desc.Query == D3D11_QUERY_OCCLUSION_PREDICATE)
1359 {
1360 unsigned mode = (predicate->desc.MiscFlags & D3D11_QUERY_MISC_PREDICATEHINT) ? PIPE_RENDER_COND_NO_WAIT : PIPE_RENDER_COND_WAIT;
1361 pipe->render_condition(pipe, predicate->query, mode);
1362 }
1363 else
1364 {
1365 /* TODO: add inverted predication to Gallium*/
1366 pipe->render_condition(pipe, 0, 0);
1367 }
1368 }
1369 }
1370 }
1371
1372 virtual void STDMETHODCALLTYPE SetPredication(
1373 __in_opt ID3D11Predicate *pPredicate,
1374 __in BOOL PredicateValue)
1375 {
1376 SYNCHRONIZED;
1377 if(render_predicate.p != pPredicate || render_predicate_value != PredicateValue)
1378 {
1379 render_predicate = pPredicate;
1380 render_predicate_value = PredicateValue;
1381 set_render_condition();
1382 }
1383 }
1384
1385 virtual void STDMETHODCALLTYPE GetPredication(
1386 __out_opt ID3D11Predicate **ppPredicate,
1387 __out_opt BOOL *pPredicateValue)
1388 {
1389 SYNCHRONIZED;
1390 if(ppPredicate)
1391 *ppPredicate = render_predicate.ref();
1392 if(pPredicateValue)
1393 *pPredicateValue = render_predicate_value;
1394 }
1395
1396 static pipe_subresource d3d11_to_pipe_subresource(struct pipe_resource* resource, unsigned subresource)
1397 {
1398 pipe_subresource sr;
1399 if(subresource <= resource->last_level)
1400 {
1401 sr.level = subresource;
1402 sr.face = 0;
1403 }
1404 else
1405 {
1406 unsigned levels = resource->last_level + 1;
1407 sr.level = subresource % levels;
1408 sr.face = subresource / levels;
1409 }
1410 return sr;
1411 }
1412
1413 virtual HRESULT STDMETHODCALLTYPE Map(
1414 __in ID3D11Resource *pResource,
1415 __in unsigned Subresource,
1416 __in D3D11_MAP MapType,
1417 __in unsigned MapFlags,
1418 __out D3D11_MAPPED_SUBRESOURCE *pMappedResource)
1419 {
1420 SYNCHRONIZED;
1421 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1422 if(resource->transfers.count(Subresource))
1423 return E_FAIL;
1424 pipe_subresource sr = d3d11_to_pipe_subresource(resource->resource, Subresource);
1425 pipe_box box;
1426 d3d11_to_pipe_box(resource->resource, sr.level, 0);
1427 unsigned usage = 0;
1428 if(MapType == D3D11_MAP_READ)
1429 usage = PIPE_TRANSFER_READ;
1430 else if(MapType == D3D11_MAP_WRITE)
1431 usage = PIPE_TRANSFER_WRITE;
1432 else if(MapType == D3D11_MAP_READ_WRITE)
1433 usage = PIPE_TRANSFER_READ_WRITE;
1434 else if(MapType == D3D11_MAP_WRITE_DISCARD)
1435 usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD;
1436 else if(MapType == D3D11_MAP_WRITE_NO_OVERWRITE)
1437 usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_NOOVERWRITE;
1438 else
1439 return E_INVALIDARG;
1440 if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT)
1441 usage |= PIPE_TRANSFER_DONTBLOCK;
1442 struct pipe_transfer* transfer = pipe->get_transfer(pipe, resource->resource, sr, usage, &box);
1443 if(!transfer) {
1444 if(MapType & D3D10_MAP_FLAG_DO_NOT_WAIT)
1445 return DXGI_ERROR_WAS_STILL_DRAWING;
1446 else
1447 return E_FAIL;
1448 }
1449 resource->transfers[Subresource] = transfer;
1450 pipe->transfer_map(pipe, transfer);
1451 pMappedResource->pData = transfer->data;
1452 pMappedResource->RowPitch = transfer->stride;
1453 pMappedResource->DepthPitch = transfer->slice_stride;
1454 return S_OK;
1455 }
1456
1457 virtual void STDMETHODCALLTYPE Unmap(
1458 __in ID3D11Resource *pResource,
1459 __in unsigned Subresource)
1460 {
1461 SYNCHRONIZED;
1462 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1463 std::unordered_map<unsigned, pipe_transfer*>::iterator i = resource->transfers.find(Subresource);
1464 if(i != resource->transfers.end())
1465 {
1466 pipe->transfer_unmap(pipe, i->second);
1467 pipe->transfer_destroy(pipe, i->second);
1468 resource->transfers.erase(i);
1469 }
1470 }
1471
1472 virtual void STDMETHODCALLTYPE CopySubresourceRegion(
1473 __in ID3D11Resource *pDstResource,
1474 __in unsigned DstSubresource,
1475 __in unsigned DstX,
1476 __in unsigned DstY,
1477 __in unsigned DstZ,
1478 __in ID3D11Resource *pSrcResource,
1479 __in unsigned SrcSubresource,
1480 __in_opt const D3D11_BOX *pSrcBox)
1481 {
1482 SYNCHRONIZED;
1483 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1484 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
1485 pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
1486 pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource);
1487 pipe_box box = d3d11_to_pipe_box(src->resource, subsrc.level, pSrcBox);
1488 for(unsigned i = 0; i < box.depth; ++i)
1489 {
1490 pipe->resource_copy_region(pipe,
1491 dst->resource, subdst, DstX, DstY, DstZ + i,
1492 src->resource, subsrc, box.x, box.y, box.z + i,
1493 box.width, box.height);
1494 }
1495 }
1496
1497 virtual void STDMETHODCALLTYPE CopyResource(
1498 __in ID3D11Resource *pDstResource,
1499 __in ID3D11Resource *pSrcResource)
1500 {
1501 SYNCHRONIZED;
1502 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1503 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
1504 pipe_subresource sr;
1505 unsigned faces = dst->resource->target == PIPE_TEXTURE_CUBE ? 6 : 1;
1506
1507 for(sr.face = 0; sr.face < faces; ++sr.face)
1508 {
1509 for(sr.level = 0; sr.level <= dst->resource->last_level; ++sr.level)
1510 {
1511 unsigned w = u_minify(dst->resource->width0, sr.level);
1512 unsigned h = u_minify(dst->resource->height0, sr.level);
1513 unsigned d = u_minify(dst->resource->depth0, sr.level);
1514 for(unsigned i = 0; i < d; ++i)
1515 {
1516 pipe->resource_copy_region(pipe,
1517 dst->resource, sr, 0, 0, i,
1518 src->resource, sr, 0, 0, i,
1519 w, h);
1520 }
1521 }
1522 }
1523 }
1524
1525 virtual void STDMETHODCALLTYPE UpdateSubresource(
1526 __in ID3D11Resource *pDstResource,
1527 __in unsigned DstSubresource,
1528 __in_opt const D3D11_BOX *pDstBox,
1529 __in const void *pSrcData,
1530 __in unsigned SrcRowPitch,
1531 __in unsigned SrcDepthPitch)
1532 {
1533 SYNCHRONIZED;
1534 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1535 pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
1536 pipe_box box = d3d11_to_pipe_box(dst->resource, subdst.level, pDstBox);
1537 pipe->transfer_inline_write(pipe, dst->resource, subdst, PIPE_TRANSFER_WRITE, &box, pSrcData, SrcRowPitch, SrcDepthPitch);
1538 }
1539
1540 #if API >= 11
1541 virtual void STDMETHODCALLTYPE CopyStructureCount(
1542 __in ID3D11Buffer *pDstBuffer,
1543 __in unsigned DstAlignedByteOffset,
1544 __in ID3D11UnorderedAccessView *pSrcView)
1545 {
1546 SYNCHRONIZED;
1547 }
1548 #endif
1549
1550 virtual void STDMETHODCALLTYPE ClearRenderTargetView(
1551 __in ID3D11RenderTargetView *pRenderTargetView,
1552 __in const float ColorRGBA[4])
1553 {
1554 SYNCHRONIZED;
1555 GalliumD3D11RenderTargetView* view = ((GalliumD3D11RenderTargetView*)pRenderTargetView);
1556 pipe->clear_render_target(pipe, view->object, ColorRGBA, 0, 0, view->object->width, view->object->height);
1557 }
1558
1559 virtual void STDMETHODCALLTYPE ClearDepthStencilView(
1560 __in ID3D11DepthStencilView *pDepthStencilView,
1561 __in unsigned ClearFlags,
1562 __in float Depth,
1563 __in UINT8 Stencil)
1564 {
1565 SYNCHRONIZED;
1566 GalliumD3D11DepthStencilView* view = ((GalliumD3D11DepthStencilView*)pDepthStencilView);
1567 unsigned flags = 0;
1568 if(ClearFlags & D3D11_CLEAR_DEPTH)
1569 flags |= PIPE_CLEAR_DEPTH;
1570 if(ClearFlags & D3D11_CLEAR_STENCIL)
1571 flags |= PIPE_CLEAR_STENCIL;
1572 pipe->clear_depth_stencil(pipe, view->object, flags, Depth, Stencil, 0, 0, view->object->width, view->object->height);
1573 }
1574
1575 #if API >= 11
1576 virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
1577 __in ID3D11UnorderedAccessView *pUnorderedAccessView,
1578 __in const unsigned Values[ 4 ])
1579 {
1580 SYNCHRONIZED;
1581 }
1582
1583 virtual void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
1584 __in ID3D11UnorderedAccessView *pUnorderedAccessView,
1585 __in const float Values[ 4 ])
1586 {
1587 SYNCHRONIZED;
1588 }
1589 #endif
1590
1591 virtual void STDMETHODCALLTYPE RestoreGalliumStateBlitOnly()
1592 {
1593 pipe->bind_blend_state(pipe, blend_state.p ? blend_state.p->object : default_blend);
1594 pipe->bind_depth_stencil_alpha_state(pipe, depth_stencil_state.p ? depth_stencil_state.p->object : default_depth_stencil);
1595 pipe->bind_rasterizer_state(pipe, rasterizer_state.p ? rasterizer_state.p->object : default_rasterizer);
1596 pipe->bind_vertex_elements_state(pipe, input_layout.p ? input_layout.p->object : default_input_layout);
1597 pipe->bind_fs_state(pipe, shaders[D3D11_STAGE_PS].p ? shaders[D3D11_STAGE_PS].p->object : default_shaders[PIPE_SHADER_FRAGMENT]);
1598 pipe->bind_vs_state(pipe, shaders[D3D11_STAGE_VS].p ? shaders[D3D11_STAGE_VS].p->object : default_shaders[PIPE_SHADER_VERTEX]);
1599 if(caps.gs)
1600 pipe->bind_gs_state(pipe, shaders[D3D11_STAGE_GS].p ? shaders[D3D11_STAGE_GS].p->object : default_shaders[PIPE_SHADER_GEOMETRY]);
1601 set_framebuffer();
1602 set_viewport();
1603 set_clip();
1604 set_render_condition();
1605 // TODO: restore stream output
1606
1607 update_flags |= UPDATE_VERTEX_BUFFERS | (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_PS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_PS));
1608 }
1609
1610 virtual void STDMETHODCALLTYPE GenerateMips(
1611 __in ID3D11ShaderResourceView *pShaderResourceView)
1612 {
1613 SYNCHRONIZED;
1614
1615 GalliumD3D11ShaderResourceView* view = (GalliumD3D11ShaderResourceView*)pShaderResourceView;
1616 if(caps.gs)
1617 pipe->bind_gs_state(pipe, 0);
1618 if(caps.so)
1619 pipe->bind_stream_output_state(pipe, 0);
1620 if(pipe->render_condition)
1621 pipe->render_condition(pipe, 0, 0);
1622 util_gen_mipmap(gen_mipmap, view->object, 0, 0, view->object->texture->last_level, PIPE_TEX_FILTER_LINEAR);
1623 RestoreGalliumStateBlitOnly();
1624 }
1625
1626 virtual void STDMETHODCALLTYPE RestoreGalliumState()
1627 {
1628 SYNCHRONIZED;
1629 RestoreGalliumStateBlitOnly();
1630
1631 set_index_buffer();
1632 set_stencil_ref();
1633 pipe->set_blend_color(pipe, (struct pipe_blend_color*)blend_color);
1634 pipe->set_sample_mask(pipe, sample_mask);
1635
1636 for(unsigned s = 0; s < 3; ++s)
1637 {
1638 unsigned num = std::min(caps.constant_buffers[s], (unsigned)D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT);
1639 for(unsigned i = 0; i < num; ++i)
1640 pipe->set_constant_buffer(pipe, s, i, constant_buffers[s][i].p ? constant_buffers[s][i].p->resource : 0);
1641 }
1642
1643 if(caps.so)
1644 pipe->set_stream_output_buffers(pipe, so_buffers, (int*)so_offsets, num_so_targets);
1645
1646 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_VS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_VS));
1647 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + D3D11_STAGE_GS)) | (1 << (UPDATE_VIEWS_SHIFT + D3D11_STAGE_GS));
1648
1649 set_scissor();
1650 }
1651
1652 #if API >= 11
1653 /* TODO: hack SRVs or sampler states to handle this, or add to Gallium */
1654 virtual void STDMETHODCALLTYPE SetResourceMinLOD(
1655 __in ID3D11Resource *pResource,
1656 float MinLOD)
1657 {
1658 SYNCHRONIZED;
1659 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1660 if(resource->min_lod != MinLOD)
1661 {
1662 // TODO: actually do anything?
1663 resource->min_lod = MinLOD;
1664 }
1665 }
1666
1667 virtual float STDMETHODCALLTYPE GetResourceMinLOD(
1668 __in ID3D11Resource *pResource)
1669 {
1670 SYNCHRONIZED;
1671 GalliumD3D11Resource<>* resource = (GalliumD3D11Resource<>*)pResource;
1672 return resource->min_lod;
1673 }
1674 #endif
1675
1676 virtual void STDMETHODCALLTYPE ResolveSubresource(
1677 __in ID3D11Resource *pDstResource,
1678 __in unsigned DstSubresource,
1679 __in ID3D11Resource *pSrcResource,
1680 __in unsigned SrcSubresource,
1681 __in DXGI_FORMAT Format)
1682 {
1683 SYNCHRONIZED;
1684 GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)pDstResource;
1685 GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)pSrcResource;
1686 pipe_subresource subdst = d3d11_to_pipe_subresource(dst->resource, DstSubresource);
1687 pipe_subresource subsrc = d3d11_to_pipe_subresource(src->resource, SrcSubresource);
1688 pipe->resource_resolve(pipe, dst->resource, subdst, src->resource, subsrc);
1689 }
1690
1691 #if API >= 11
1692 virtual void STDMETHODCALLTYPE ExecuteCommandList(
1693 __in ID3D11CommandList *pCommandList,
1694 BOOL RestoreContextState)
1695 {
1696 SYNCHRONIZED;
1697 }
1698
1699 virtual HRESULT STDMETHODCALLTYPE FinishCommandList(
1700 BOOL RestoreDeferredContextState,
1701 __out_opt ID3D11CommandList **ppCommandList)
1702 {
1703 SYNCHRONIZED;
1704 return E_NOTIMPL;
1705 }
1706 #endif
1707
1708 virtual void STDMETHODCALLTYPE ClearState(void)
1709 {
1710 SYNCHRONIZED;
1711
1712 // we qualify all calls so that we avoid virtual dispatch and might get them inlined
1713 // TODO: make sure all this gets inlined, which might require more compiler flags
1714 // TODO: optimize this
1715 #if API >= 11
1716 GalliumD3D11DeviceContext::PSSetShader(0, 0, 0);
1717 GalliumD3D11DeviceContext::GSSetShader(0, 0, 0);
1718 GalliumD3D11DeviceContext::VSSetShader(0, 0, 0);
1719 GalliumD3D11DeviceContext::HSSetShader(0, 0, 0);
1720 GalliumD3D11DeviceContext::DSSetShader(0, 0, 0);
1721 GalliumD3D11DeviceContext::CSSetShader(0, 0, 0);
1722 #else
1723 GalliumD3D11DeviceContext::PSSetShader(0);
1724 GalliumD3D11DeviceContext::GSSetShader(0);
1725 GalliumD3D11DeviceContext::VSSetShader(0);
1726 #endif
1727
1728 GalliumD3D11DeviceContext::IASetInputLayout(0);
1729 GalliumD3D11DeviceContext::IASetIndexBuffer(0, DXGI_FORMAT_UNKNOWN, 0);
1730 GalliumD3D11DeviceContext::RSSetState(0);
1731 GalliumD3D11DeviceContext::OMSetDepthStencilState(0, 0);
1732 GalliumD3D11DeviceContext::OMSetBlendState(0, (float*)zero_data, ~0);
1733 GalliumD3D11DeviceContext::SetPredication(0, 0);
1734 GalliumD3D11DeviceContext::IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED);
1735
1736 GalliumD3D11DeviceContext::PSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1737 GalliumD3D11DeviceContext::GSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1738 GalliumD3D11DeviceContext::VSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1739 #if API >= 11
1740 GalliumD3D11DeviceContext::HSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1741 GalliumD3D11DeviceContext::DSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1742 GalliumD3D11DeviceContext::CSSetConstantBuffers(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, (ID3D11Buffer**)zero_data);
1743 #endif
1744
1745 GalliumD3D11DeviceContext::IASetVertexBuffers(0, num_vertex_buffers, (ID3D11Buffer**)zero_data, (unsigned*)zero_data, (unsigned*)zero_data);
1746 #if API >= 11
1747 GalliumD3D11DeviceContext::OMSetRenderTargetsAndUnorderedAccessViews(0, 0, 0 , 0, 0, 0, 0);
1748 #else
1749 GalliumD3D11DeviceContext::OMSetRenderTargets(0, 0, 0 );
1750 #endif
1751 GalliumD3D11DeviceContext::SOSetTargets(0, 0, 0);
1752
1753 GalliumD3D11DeviceContext::PSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11ShaderResourceView**)zero_data);
1754 GalliumD3D11DeviceContext::GSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11ShaderResourceView**)zero_data);
1755 GalliumD3D11DeviceContext::VSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11ShaderResourceView**)zero_data);
1756 #if API >= 11
1757 GalliumD3D11DeviceContext::HSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11ShaderResourceView**)zero_data);
1758 GalliumD3D11DeviceContext::DSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11ShaderResourceView**)zero_data);
1759 GalliumD3D11DeviceContext::CSSetShaderResources(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11ShaderResourceView**)zero_data);
1760 #endif
1761
1762 GalliumD3D11DeviceContext::PSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_PS], (ID3D11SamplerState**)zero_data);
1763 GalliumD3D11DeviceContext::GSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_GS], (ID3D11SamplerState**)zero_data);
1764 GalliumD3D11DeviceContext::VSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_VS], (ID3D11SamplerState**)zero_data);
1765 #if API >= 11
1766 GalliumD3D11DeviceContext::HSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_HS], (ID3D11SamplerState**)zero_data);
1767 GalliumD3D11DeviceContext::DSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_DS], (ID3D11SamplerState**)zero_data);
1768 GalliumD3D11DeviceContext::CSSetSamplers(0, num_shader_resource_views[D3D11_STAGE_CS], (ID3D11SamplerState**)zero_data);
1769 #endif
1770
1771 GalliumD3D11DeviceContext::RSSetViewports(0, 0);
1772 GalliumD3D11DeviceContext::RSSetScissorRects(0, 0);
1773 }
1774
1775 virtual void STDMETHODCALLTYPE Flush(void)
1776 {
1777 SYNCHRONIZED;
1778 pipe->flush(pipe, PIPE_FLUSH_FRAME, 0);
1779 }
1780
1781 /* In Direct3D 10, if the reference count of an object drops to 0, it is automatically
1782 * cleanly unbound from the pipeline.
1783 * In Direct3D 11, the pipeline holds a reference.
1784 *
1785 * Note that instead of always scanning the pipeline on destruction, we could
1786 * maintain the internal reference count on DirectX 10 and use it to check if an
1787 * object is still bound.
1788 * Presumably, on average, scanning is faster if the application is well written.
1789 */
1790 #if API < 11
1791 #define IMPLEMENT_SIMPLE_UNBIND(name, member, gallium, def) \
1792 void Unbind##name(ID3D11##name* state) \
1793 { \
1794 SYNCHRONIZED; \
1795 if((void*)state == (void*)member.p) \
1796 { \
1797 member.p = 0; \
1798 pipe->bind_##gallium##_state(pipe, default_##def); \
1799 } \
1800 }
1801 IMPLEMENT_SIMPLE_UNBIND(BlendState, blend_state, blend, blend)
1802 IMPLEMENT_SIMPLE_UNBIND(RasterizerState, rasterizer_state, rasterizer, rasterizer)
1803 IMPLEMENT_SIMPLE_UNBIND(DepthStencilState, depth_stencil_state, depth_stencil_alpha, depth_stencil)
1804 IMPLEMENT_SIMPLE_UNBIND(InputLayout, input_layout, vertex_elements, input_layout)
1805 IMPLEMENT_SIMPLE_UNBIND(PixelShader, shaders[D3D11_STAGE_PS], fs, shaders[D3D11_STAGE_PS])
1806 IMPLEMENT_SIMPLE_UNBIND(VertexShader, shaders[D3D11_STAGE_VS], vs, shaders[D3D11_STAGE_VS])
1807 IMPLEMENT_SIMPLE_UNBIND(GeometryShader, shaders[D3D11_STAGE_GS], gs, shaders[D3D11_STAGE_GS])
1808
1809 void UnbindPredicate(ID3D11Predicate* predicate)
1810 {
1811 SYNCHRONIZED;
1812 if(predicate == render_predicate)
1813 {
1814 render_predicate.p = NULL;
1815 render_predicate_value = 0;
1816 pipe->render_condition(pipe, 0, 0);
1817 }
1818 }
1819
1820 void UnbindSamplerState(ID3D11SamplerState* state)
1821 {
1822 SYNCHRONIZED;
1823 for(unsigned s = 0; s < D3D11_STAGES; ++s)
1824 {
1825 for(unsigned i = 0; i < num_samplers[s]; ++i)
1826 {
1827 if(samplers[s][i] == state)
1828 {
1829 samplers[s][i].p = NULL;
1830 sampler_csos[s].v[i] = NULL;
1831 update_flags |= (1 << (UPDATE_SAMPLERS_SHIFT + s));
1832 }
1833 }
1834 }
1835 }
1836
1837 void UnbindBuffer(ID3D11Buffer* buffer)
1838 {
1839 SYNCHRONIZED;
1840 if(buffer == index_buffer)
1841 {
1842 index_buffer.p = 0;
1843 index_format = DXGI_FORMAT_UNKNOWN;
1844 index_offset = 0;
1845 struct pipe_index_buffer ib;
1846 memset(&ib, 0, sizeof(ib));
1847 pipe->set_index_buffer(pipe, &ib);
1848 }
1849
1850 for(unsigned i = 0; i < num_vertex_buffers; ++i)
1851 {
1852 if(buffer == input_buffers[i])
1853 {
1854 input_buffers[i].p = 0;
1855 memset(&vertex_buffers[num_vertex_buffers], 0, sizeof(vertex_buffers[num_vertex_buffers]));
1856 update_flags |= UPDATE_VERTEX_BUFFERS;
1857 }
1858 }
1859
1860 for(unsigned s = 0; s < D3D11_STAGES; ++s)
1861 {
1862 for(unsigned i = 0; i < sizeof(constant_buffers) / sizeof(constant_buffers[0]); ++i)
1863 {
1864 if(constant_buffers[s][i] == buffer)
1865 {
1866 constant_buffers[s][i] = (ID3D10Buffer*)NULL;
1867 pipe->set_constant_buffer(pipe, s, i, NULL);
1868 }
1869 }
1870 }
1871 }
1872
1873 void UnbindDepthStencilView(ID3D11DepthStencilView* view)
1874 {
1875 SYNCHRONIZED;
1876 if(view == depth_stencil_view)
1877 {
1878 depth_stencil_view.p = NULL;
1879 set_framebuffer();
1880 }
1881 }
1882
1883 void UnbindRenderTargetView(ID3D11RenderTargetView* view)
1884 {
1885 SYNCHRONIZED;
1886 bool any_bound = false;
1887 for(unsigned i = 0; i < num_render_target_views; ++i)
1888 {
1889 if(render_target_views[i] == view)
1890 {
1891 render_target_views[i].p = NULL;
1892 any_bound = true;
1893 }
1894 }
1895 if(any_bound)
1896 set_framebuffer();
1897 }
1898
1899 void UnbindShaderResourceView(ID3D11ShaderResourceView* view)
1900 {
1901 SYNCHRONIZED;
1902 for(unsigned s = 0; s < D3D11_STAGES; ++s)
1903 {
1904 for(unsigned i = 0; i < num_shader_resource_views[s]; ++i)
1905 {
1906 if(shader_resource_views[s][i] == view)
1907 {
1908 shader_resource_views[s][i].p = NULL;
1909 sampler_views[s][i] = NULL;
1910 update_flags |= (1 << (UPDATE_VIEWS_SHIFT + s));
1911 }
1912 }
1913 }
1914 }
1915 #endif
1916
1917 #undef SYNCHRONIZED
1918 };
1919
1920 #if API >= 11
1921 /* This approach serves two purposes.
1922 * First, we don't want to do an atomic operation to manipulate the reference
1923 * count every time something is bound/unbound to the pipeline, since they are
1924 * expensive.
1925 * Fortunately, the immediate context can only be used by a single thread, so
1926 * we don't have to use them, as long as a separate reference count is used
1927 * (see dual_refcnt_t).
1928 *
1929 * Second, we want to avoid the Device -> DeviceContext -> bound DeviceChild -> Device
1930 * garbage cycle.
1931 * To avoid it, DeviceChild doesn't hold a reference to Device as usual, but adds
1932 * one for each external reference count, while internal nonatomic_add_ref doesn't
1933 * add any.
1934 *
1935 * Note that ideally we would to eliminate the non-atomic op too, but this is more
1936 * complicated, since we would either need to use garbage collection and give up
1937 * deterministic destruction (especially bad for large textures), or scan the whole
1938 * pipeline state every time the reference count of object drops to 0, which risks
1939 * pathological slowdowns.
1940 *
1941 * Since this microoptimization should matter relatively little, let's avoid it for now.
1942 *
1943 * Note that deferred contexts don't use this, since as a whole, they must thread-safe.
1944 * Eliminating the atomic ops for deferred contexts seems substantially harder.
1945 * This might be a problem if they are used in a one-shot multithreaded rendering
1946 * fashion, where SMP cacheline bouncing on the reference count may be visible.
1947 *
1948 * The idea would be to attach a structure of reference counts indexed by deferred
1949 * context id to each object. Ideally, this should be organized like ext2 block pointers.
1950 *
1951 * Every deferred context would get a reference count in its own cacheline.
1952 * The external count is protected by a lock bit, and there is also a "lock bit" in each
1953 * internal count.
1954 *
1955 * When the external count has to be dropped to 0, the lock bit is taken and all internal
1956 * reference counts are scanned, taking a count of them. A flag would also be set on them.
1957 * Deferred context manipulation would notice the flag, and update the count.
1958 * Once the count goes to zero, the object is freed.
1959 *
1960 * The problem of this is that if the external reference count ping-pongs between
1961 * zero and non-zero, the scans will take a lot of time.
1962 *
1963 * The idea to solve this is to compute the scans in a binary-tree like fashion, where
1964 * each binary tree node would have a "determined bit", which would be invalidated
1965 * by manipulations.
1966 *
1967 * However, all this complexity might actually be a loss in most cases, so let's just
1968 * stick to a single atomic refcnt for now.
1969 *
1970 * Also, we don't even support deferred contexts yet, so this can wait.
1971 */
1972 struct nonatomic_device_child_ptr_traits
1973 {
1974 static void add_ref(void* p)
1975 {
1976 if(p)
1977 ((GalliumD3D11DeviceChild<>*)p)->nonatomic_add_ref();
1978 }
1979
1980 static void release(void* p)
1981 {
1982 if(p)
1983 ((GalliumD3D11DeviceChild<>*)p)->nonatomic_release();
1984 }
1985 };
1986
1987 struct GalliumD3D11ImmediateDeviceContext
1988 : public GalliumD3D11DeviceContext<nonatomic_device_child_ptr_traits>
1989 {
1990 GalliumD3D11ImmediateDeviceContext(GalliumD3D11Screen* device, pipe_context* pipe, unsigned context_flags = 0)
1991 : GalliumD3D11DeviceContext(device, pipe, context_flags)
1992 {
1993 // not necessary, but tests that the API at least basically works
1994 ClearState();
1995 }
1996
1997 /* we do this since otherwise we would have a garbage cycle between this and the device */
1998 virtual ULONG STDMETHODCALLTYPE AddRef()
1999 {
2000 return this->device->AddRef();
2001 }
2002
2003 virtual ULONG STDMETHODCALLTYPE Release()
2004 {
2005 return this->device->Release();
2006 }
2007
2008 virtual D3D11_DEVICE_CONTEXT_TYPE STDMETHODCALLTYPE GetType()
2009 {
2010 return D3D11_DEVICE_CONTEXT_IMMEDIATE;
2011 }
2012 };
2013
2014 static ID3D11DeviceContext* GalliumD3D11ImmediateDeviceContext_Create(GalliumD3D11Screen* device, struct pipe_context* pipe, bool owns_pipe)
2015 {
2016 return new GalliumD3D11ImmediateDeviceContext(device, pipe, owns_pipe);
2017 }
2018
2019 static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumState(ID3D11DeviceContext* context)
2020 {
2021 ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumState();
2022 }
2023
2024 static void GalliumD3D11ImmediateDeviceContext_RestoreGalliumStateBlitOnly(ID3D11DeviceContext* context)
2025 {
2026 ((GalliumD3D11ImmediateDeviceContext*)context)->RestoreGalliumStateBlitOnly();
2027 }
2028
2029 static void GalliumD3D11ImmediateDeviceContext_Destroy(ID3D11DeviceContext* context)
2030 {
2031 delete (GalliumD3D11ImmediateDeviceContext*)context;
2032 }
2033 #endif