st/nine: Refactor how user constbufs sizes are calculated
[mesa.git] / src / gallium / state_trackers / nine / stateblock9.c
1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "stateblock9.h"
24 #include "device9.h"
25 #include "basetexture9.h"
26 #include "nine_helpers.h"
27
28 #define DBG_CHANNEL DBG_STATEBLOCK
29
30 /* XXX TODO: handling of lights is broken */
31
32 HRESULT
33 NineStateBlock9_ctor( struct NineStateBlock9 *This,
34 struct NineUnknownParams *pParams,
35 enum nine_stateblock_type type )
36 {
37 HRESULT hr = NineUnknown_ctor(&This->base, pParams);
38
39 DBG("This=%p pParams=%p type=%d\n", This, pParams, type);
40
41 if (FAILED(hr))
42 return hr;
43
44 This->type = type;
45
46 This->state.vs_const_f = MALLOC(This->base.device->vs_const_size);
47 This->state.ps_const_f = MALLOC(This->base.device->ps_const_size);
48 if (!This->state.vs_const_f || !This->state.ps_const_f)
49 return E_OUTOFMEMORY;
50
51 return D3D_OK;
52 }
53
54 void
55 NineStateBlock9_dtor( struct NineStateBlock9 *This )
56 {
57 struct nine_state *state = &This->state;
58 struct nine_range *r;
59 struct nine_range_pool *pool = &This->base.device->range_pool;
60
61 nine_state_clear(state, FALSE);
62
63 FREE(state->vs_const_f);
64 FREE(state->ps_const_f);
65
66 FREE(state->ff.light);
67
68 FREE(state->ff.transform);
69
70 if (This->state.changed.ps_const_f) {
71 for (r = This->state.changed.ps_const_f; r->next; r = r->next);
72 nine_range_pool_put_chain(pool, This->state.changed.ps_const_f, r);
73 }
74 if (This->state.changed.vs_const_f) {
75 for (r = This->state.changed.vs_const_f; r->next; r = r->next);
76 nine_range_pool_put_chain(pool, This->state.changed.vs_const_f, r);
77 }
78
79 NineUnknown_dtor(&This->base);
80 }
81
82 /* Copy state marked changed in @mask from @src to @dst.
83 * If @apply is false, updating dst->changed can be omitted.
84 * TODO: compare ?
85 */
86 static void
87 nine_state_copy_common(struct nine_state *dst,
88 const struct nine_state *src,
89 struct nine_state *mask, /* aliases either src or dst */
90 const boolean apply,
91 struct nine_range_pool *pool)
92 {
93 unsigned i, s;
94
95 if (apply)
96 dst->changed.group |= mask->changed.group;
97
98 if (mask->changed.group & NINE_STATE_VIEWPORT)
99 dst->viewport = src->viewport;
100 if (mask->changed.group & NINE_STATE_SCISSOR)
101 dst->scissor = src->scissor;
102
103 if (mask->changed.group & NINE_STATE_VS)
104 nine_bind(&dst->vs, src->vs);
105 if (mask->changed.group & NINE_STATE_PS)
106 nine_bind(&dst->ps, src->ps);
107
108 /* Vertex constants.
109 *
110 * Various possibilities for optimization here, like creating a per-SB
111 * constant buffer, or memcmp'ing for changes.
112 * Will do that later depending on what works best for specific apps.
113 */
114 if (mask->changed.group & NINE_STATE_VS_CONST) {
115 struct nine_range *r;
116 for (r = mask->changed.vs_const_f; r; r = r->next) {
117 memcpy(&dst->vs_const_f[r->bgn * 4],
118 &src->vs_const_f[r->bgn * 4],
119 (r->end - r->bgn) * 4 * sizeof(float));
120 if (apply)
121 nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
122 pool);
123 }
124 if (mask->changed.vs_const_i) {
125 uint16_t m = mask->changed.vs_const_i;
126 for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
127 if (m & 1)
128 memcpy(dst->vs_const_i[i], src->vs_const_i[i], 4 * sizeof(int));
129 if (apply)
130 dst->changed.vs_const_i |= mask->changed.vs_const_i;
131 }
132 if (mask->changed.vs_const_b) {
133 uint16_t m = mask->changed.vs_const_b;
134 for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
135 if (m & 1)
136 dst->vs_const_b[i] = src->vs_const_b[i];
137 if (apply)
138 dst->changed.vs_const_b |= mask->changed.vs_const_b;
139 }
140 }
141
142 /* Pixel constants. */
143 if (mask->changed.group & NINE_STATE_PS_CONST) {
144 struct nine_range *r;
145 for (r = mask->changed.ps_const_f; r; r = r->next) {
146 memcpy(&dst->ps_const_f[r->bgn * 4],
147 &src->ps_const_f[r->bgn * 4],
148 (r->end - r->bgn) * 4 * sizeof(float));
149 if (apply)
150 nine_ranges_insert(&dst->changed.ps_const_f, r->bgn, r->end,
151 pool);
152 }
153 if (mask->changed.ps_const_i) {
154 uint16_t m = mask->changed.ps_const_i;
155 for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
156 if (m & 1)
157 memcpy(dst->ps_const_i[i], src->ps_const_i[i], 4 * sizeof(int));
158 if (apply)
159 dst->changed.ps_const_i |= mask->changed.ps_const_i;
160 }
161 if (mask->changed.ps_const_b) {
162 uint16_t m = mask->changed.ps_const_b;
163 for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
164 if (m & 1)
165 dst->ps_const_b[i] = src->ps_const_b[i];
166 if (apply)
167 dst->changed.ps_const_b |= mask->changed.ps_const_b;
168 }
169 }
170
171 /* Render states.
172 * TODO: Maybe build a list ?
173 */
174 for (i = 0; i < Elements(dst->changed.rs); ++i) {
175 uint32_t m = mask->changed.rs[i];
176 if (apply)
177 dst->changed.rs[i] |= m;
178 while (m) {
179 const int r = ffs(m) - 1;
180 m &= ~(1 << r);
181 dst->rs[i * 32 + r] = src->rs[i * 32 + r];
182 }
183 }
184
185
186 /* Clip planes. */
187 if (mask->changed.ucp) {
188 for (i = 0; i < PIPE_MAX_CLIP_PLANES; ++i)
189 if (mask->changed.ucp & (1 << i))
190 memcpy(dst->clip.ucp[i],
191 src->clip.ucp[i], sizeof(src->clip.ucp[0]));
192 if (apply)
193 dst->changed.ucp |= mask->changed.ucp;
194 }
195
196 /* Sampler state. */
197 if (mask->changed.group & NINE_STATE_SAMPLER) {
198 for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
199 if (mask->changed.sampler[s] == 0x3ffe) {
200 memcpy(&dst->samp[s], &src->samp[s], sizeof(dst->samp[s]));
201 } else {
202 uint32_t m = mask->changed.sampler[s];
203 while (m) {
204 const int i = ffs(m) - 1;
205 m &= ~(1 << i);
206 dst->samp[s][i] = src->samp[s][i];
207 }
208 }
209 if (apply)
210 dst->changed.sampler[s] |= mask->changed.sampler[s];
211 }
212 }
213
214 /* Index buffer. */
215 if (mask->changed.group & NINE_STATE_IDXBUF)
216 nine_bind(&dst->idxbuf, src->idxbuf);
217
218 /* Vertex streams. */
219 if (mask->changed.vtxbuf | mask->changed.stream_freq) {
220 uint32_t m = mask->changed.vtxbuf | mask->changed.stream_freq;
221 for (i = 0; m; ++i, m >>= 1) {
222 if (mask->changed.vtxbuf & (1 << i)) {
223 nine_bind(&dst->stream[i], src->stream[i]);
224 if (src->stream[i]) {
225 dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
226 dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
227 dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
228 }
229 }
230 if (mask->changed.stream_freq & (1 << i))
231 dst->stream_freq[i] = src->stream_freq[i];
232 }
233 dst->stream_instancedata_mask &= ~mask->changed.stream_freq;
234 dst->stream_instancedata_mask |=
235 src->stream_instancedata_mask & mask->changed.stream_freq;
236 if (apply) {
237 dst->changed.vtxbuf |= mask->changed.vtxbuf;
238 dst->changed.stream_freq |= mask->changed.stream_freq;
239 }
240 }
241
242 if (!(mask->changed.group & NINE_STATE_FF))
243 return;
244 WARN_ONCE("Fixed function state not handled properly by StateBlocks.\n");
245
246 /* Fixed function state. */
247 if (apply)
248 dst->ff.changed.group |= src->ff.changed.group;
249
250 if (mask->changed.group & NINE_STATE_FF_MATERIAL)
251 dst->ff.material = src->ff.material;
252
253 if (mask->changed.group & NINE_STATE_FF_PSSTAGES) {
254 for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
255 for (i = 0; i < NINED3DTSS_COUNT; ++i)
256 if (mask->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
257 dst->ff.tex_stage[s][i] = src->ff.tex_stage[s][i];
258 if (apply) {
259 /* TODO: it's 32 exactly, just offset by 1 as 0 is unused */
260 dst->ff.changed.tex_stage[s][0] |=
261 mask->ff.changed.tex_stage[s][0];
262 dst->ff.changed.tex_stage[s][1] |=
263 mask->ff.changed.tex_stage[s][1];
264 }
265 }
266 }
267 if (mask->changed.group & NINE_STATE_FF_LIGHTING) {
268 if (dst->ff.num_lights < mask->ff.num_lights) {
269 dst->ff.light = REALLOC(dst->ff.light,
270 dst->ff.num_lights * sizeof(D3DLIGHT9),
271 mask->ff.num_lights * sizeof(D3DLIGHT9));
272 dst->ff.num_lights = mask->ff.num_lights;
273 }
274 for (i = 0; i < mask->ff.num_lights; ++i)
275 if (mask->ff.light[i].Type != NINED3DLIGHT_INVALID)
276 dst->ff.light[i] = src->ff.light[i];
277
278 DBG("TODO: active lights\n");
279 }
280 if (mask->changed.group & NINE_STATE_FF_VSTRANSF) {
281 for (i = 0; i < Elements(mask->ff.changed.transform); ++i) {
282 if (!mask->ff.changed.transform[i])
283 continue;
284 for (s = i * 32; s < (i * 32 + 32); ++s) {
285 if (!(mask->ff.changed.transform[i] & (1 << (s % 32))))
286 continue;
287 *nine_state_access_transform(dst, s, TRUE) =
288 *nine_state_access_transform( /* const because !alloc */
289 (struct nine_state *)src, s, FALSE);
290 }
291 if (apply)
292 dst->ff.changed.transform[i] |= mask->ff.changed.transform[i];
293 }
294 }
295 }
296
297 static void
298 nine_state_copy_common_all(struct nine_state *dst,
299 const struct nine_state *src,
300 struct nine_state *help,
301 const boolean apply,
302 struct nine_range_pool *pool,
303 const int MaxStreams)
304 {
305 unsigned i;
306
307 if (apply)
308 dst->changed.group |= src->changed.group;
309
310 dst->viewport = src->viewport;
311 dst->scissor = src->scissor;
312
313 nine_bind(&dst->vs, src->vs);
314 nine_bind(&dst->ps, src->ps);
315
316 /* Vertex constants.
317 *
318 * Various possibilities for optimization here, like creating a per-SB
319 * constant buffer, or memcmp'ing for changes.
320 * Will do that later depending on what works best for specific apps.
321 */
322 if (1) {
323 struct nine_range *r = help->changed.vs_const_f;
324 memcpy(&dst->vs_const_f[0],
325 &src->vs_const_f[0], (r->end - r->bgn) * 4 * sizeof(float));
326 if (apply)
327 nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool);
328
329 memcpy(dst->vs_const_i, src->vs_const_i, sizeof(dst->vs_const_i));
330 memcpy(dst->vs_const_b, src->vs_const_b, sizeof(dst->vs_const_b));
331 if (apply) {
332 dst->changed.vs_const_i |= src->changed.vs_const_i;
333 dst->changed.vs_const_b |= src->changed.vs_const_b;
334 }
335 }
336
337 /* Pixel constants. */
338 if (1) {
339 struct nine_range *r = help->changed.ps_const_f;
340 memcpy(&dst->ps_const_f[0],
341 &src->ps_const_f[0], (r->end - r->bgn) * 4 * sizeof(float));
342 if (apply)
343 nine_ranges_insert(&dst->changed.ps_const_f, r->bgn, r->end, pool);
344
345 memcpy(dst->ps_const_i, src->ps_const_i, sizeof(dst->ps_const_i));
346 memcpy(dst->ps_const_b, src->ps_const_b, sizeof(dst->ps_const_b));
347 if (apply) {
348 dst->changed.ps_const_i |= src->changed.ps_const_i;
349 dst->changed.ps_const_b |= src->changed.ps_const_b;
350 }
351 }
352
353 /* Render states. */
354 memcpy(dst->rs, src->rs, sizeof(dst->rs));
355 if (apply)
356 memcpy(dst->changed.rs, src->changed.rs, sizeof(dst->changed.rs));
357
358
359 /* Clip planes. */
360 memcpy(&dst->clip, &src->clip, sizeof(dst->clip));
361 if (apply)
362 dst->changed.ucp = src->changed.ucp;
363
364 /* Sampler state. */
365 memcpy(dst->samp, src->samp, sizeof(dst->samp));
366 if (apply)
367 memcpy(dst->changed.sampler,
368 src->changed.sampler, sizeof(dst->changed.sampler));
369
370 /* Index buffer. */
371 nine_bind(&dst->idxbuf, src->idxbuf);
372
373 /* Vertex streams. */
374 if (1) {
375 for (i = 0; i < Elements(dst->stream); ++i) {
376 nine_bind(&dst->stream[i], src->stream[i]);
377 if (src->stream[i]) {
378 dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
379 dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
380 dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
381 }
382 dst->stream_freq[i] = src->stream_freq[i];
383 }
384 dst->stream_instancedata_mask = src->stream_instancedata_mask;
385 if (apply) {
386 dst->changed.vtxbuf = (1ULL << MaxStreams) - 1;
387 dst->changed.stream_freq = (1ULL << MaxStreams) - 1;
388 }
389 }
390
391 /* keep this check in case we want to disable FF */
392 if (!(help->changed.group & NINE_STATE_FF))
393 return;
394 WARN_ONCE("Fixed function state not handled properly by StateBlocks.\n");
395
396 /* Fixed function state. */
397 if (apply)
398 dst->ff.changed.group = src->ff.changed.group;
399
400 dst->ff.material = src->ff.material;
401
402 memcpy(dst->ff.tex_stage, src->ff.tex_stage, sizeof(dst->ff.tex_stage));
403 if (apply) /* TODO: memset */
404 memcpy(dst->ff.changed.tex_stage,
405 src->ff.changed.tex_stage, sizeof(dst->ff.changed.tex_stage));
406
407 /* Lights. */
408 if (1) {
409 if (dst->ff.num_lights < src->ff.num_lights) {
410 dst->ff.light = REALLOC(dst->ff.light,
411 dst->ff.num_lights * sizeof(D3DLIGHT9),
412 src->ff.num_lights * sizeof(D3DLIGHT9));
413 dst->ff.num_lights = src->ff.num_lights;
414 }
415 memcpy(dst->ff.light,
416 src->ff.light, src->ff.num_lights * sizeof(dst->ff.light[0]));
417
418 DBG("TODO: active lights\n");
419 }
420
421 /* Transforms. */
422 if (1) {
423 if (dst->ff.num_transforms < src->ff.num_transforms) {
424 dst->ff.transform = REALLOC(dst->ff.transform,
425 dst->ff.num_transforms * sizeof(dst->ff.transform[0]),
426 src->ff.num_transforms * sizeof(src->ff.transform[0]));
427 dst->ff.num_transforms = src->ff.num_transforms;
428 }
429 memcpy(dst->ff.transform,
430 src->ff.transform, src->ff.num_transforms * sizeof(D3DMATRIX));
431 if (apply) /* TODO: memset */
432 memcpy(dst->ff.changed.transform,
433 src->ff.changed.transform, sizeof(dst->ff.changed.transform));
434 }
435 }
436
437 /* Capture those bits of current device state that have been changed between
438 * BeginStateBlock and EndStateBlock.
439 */
440 HRESULT WINAPI
441 NineStateBlock9_Capture( struct NineStateBlock9 *This )
442 {
443 struct nine_state *dst = &This->state;
444 struct nine_state *src = &This->base.device->state;
445 const int MaxStreams = This->base.device->caps.MaxStreams;
446 unsigned s;
447
448 DBG("This=%p\n", This);
449
450 if (This->type == NINESBT_ALL)
451 nine_state_copy_common_all(dst, src, dst, FALSE, NULL, MaxStreams);
452 else
453 nine_state_copy_common(dst, src, dst, FALSE, NULL);
454
455 if (dst->changed.group & NINE_STATE_VDECL)
456 nine_bind(&dst->vdecl, src->vdecl);
457
458 /* Textures */
459 if (dst->changed.texture) {
460 uint32_t m = dst->changed.texture;
461 for (s = 0; m; ++s, m >>= 1)
462 if (m & 1)
463 nine_bind(&dst->texture[s], src->texture[s]);
464 }
465
466 return D3D_OK;
467 }
468
469 /* Set state managed by this StateBlock as current device state. */
470 HRESULT WINAPI
471 NineStateBlock9_Apply( struct NineStateBlock9 *This )
472 {
473 struct nine_state *dst = &This->base.device->state;
474 struct nine_state *src = &This->state;
475 struct nine_range_pool *pool = &This->base.device->range_pool;
476 const int MaxStreams = This->base.device->caps.MaxStreams;
477 unsigned s;
478
479 DBG("This=%p\n", This);
480
481 if (This->type == NINESBT_ALL)
482 nine_state_copy_common_all(dst, src, src, TRUE, pool, MaxStreams);
483 else
484 nine_state_copy_common(dst, src, src, TRUE, pool);
485
486 if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
487 nine_bind(&dst->vdecl, src->vdecl);
488
489 /* Textures */
490 if (src->changed.texture) {
491 uint32_t m = src->changed.texture;
492 dst->changed.texture |= m;
493
494 dst->samplers_shadow &= ~m;
495
496 for (s = 0; m; ++s, m >>= 1) {
497 struct NineBaseTexture9 *tex = src->texture[s];
498 if (!(m & 1))
499 continue;
500 if (tex) {
501 tex->bind_count++;
502 if ((tex->dirty | tex->dirty_mip) && LIST_IS_EMPTY(&tex->list))
503 list_add(&tex->list, &This->base.device->update_textures);
504 dst->samplers_shadow |= tex->shadow << s;
505 }
506 if (src->texture[s])
507 src->texture[s]->bind_count--;
508 nine_bind(&dst->texture[s], src->texture[s]);
509 }
510 }
511
512 return D3D_OK;
513 }
514
515 IDirect3DStateBlock9Vtbl NineStateBlock9_vtable = {
516 (void *)NineUnknown_QueryInterface,
517 (void *)NineUnknown_AddRef,
518 (void *)NineUnknown_Release,
519 (void *)NineUnknown_GetDevice, /* actually part of StateBlock9 iface */
520 (void *)NineStateBlock9_Capture,
521 (void *)NineStateBlock9_Apply
522 };
523
524 static const GUID *NineStateBlock9_IIDs[] = {
525 &IID_IDirect3DStateBlock9,
526 &IID_IUnknown,
527 NULL
528 };
529
530 HRESULT
531 NineStateBlock9_new( struct NineDevice9 *pDevice,
532 struct NineStateBlock9 **ppOut,
533 enum nine_stateblock_type type)
534 {
535 NINE_DEVICE_CHILD_NEW(StateBlock9, ppOut, pDevice, type);
536 }