svga: fix uninitialized fields in DefineDepthStencilView/DefineStreamOutput
[mesa.git] / src / gallium / drivers / svga / svga_shader.c
1 /**********************************************************
2 * Copyright 2008-2012 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "util/u_bitmask.h"
27 #include "util/u_memory.h"
28 #include "util/u_format.h"
29 #include "svga_context.h"
30 #include "svga_cmd.h"
31 #include "svga_format.h"
32 #include "svga_shader.h"
33 #include "svga_resource_texture.h"
34
35
36 /**
37 * This bit isn't really used anywhere. It only serves to help
38 * generate a unique "signature" for the vertex shader output bitmask.
39 * Shader input/output signatures are used to resolve shader linking
40 * issues.
41 */
42 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
43
44
45 /**
46 * Use the shader info to generate a bitmask indicating which generic
47 * inputs are used by the shader. A set bit indicates that GENERIC[i]
48 * is used.
49 */
50 uint64_t
51 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
52 {
53 unsigned i;
54 uint64_t mask = 0x0;
55
56 for (i = 0; i < info->num_inputs; i++) {
57 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
58 unsigned j = info->input_semantic_index[i];
59 assert(j < sizeof(mask) * 8);
60 mask |= ((uint64_t) 1) << j;
61 }
62 }
63
64 return mask;
65 }
66
67
68 /**
69 * Scan shader info to return a bitmask of written outputs.
70 */
71 uint64_t
72 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
73 {
74 unsigned i;
75 uint64_t mask = 0x0;
76
77 for (i = 0; i < info->num_outputs; i++) {
78 switch (info->output_semantic_name[i]) {
79 case TGSI_SEMANTIC_GENERIC:
80 {
81 unsigned j = info->output_semantic_index[i];
82 assert(j < sizeof(mask) * 8);
83 mask |= ((uint64_t) 1) << j;
84 }
85 break;
86 case TGSI_SEMANTIC_FOG:
87 mask |= FOG_GENERIC_BIT;
88 break;
89 }
90 }
91
92 return mask;
93 }
94
95
96
97 /**
98 * Given a mask of used generic variables (as returned by the above functions)
99 * fill in a table which maps those indexes to small integers.
100 * This table is used by the remap_generic_index() function in
101 * svga_tgsi_decl_sm30.c
102 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
103 * GENERIC[3] are used. The remap_table will contain:
104 * table[1] = 0;
105 * table[3] = 1;
106 * The remaining table entries will be filled in with the next unused
107 * generic index (in this example, 2).
108 */
109 void
110 svga_remap_generics(uint64_t generics_mask,
111 int8_t remap_table[MAX_GENERIC_VARYING])
112 {
113 /* Note texcoord[0] is reserved so start at 1 */
114 unsigned count = 1, i;
115
116 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
117 remap_table[i] = -1;
118 }
119
120 /* for each bit set in generic_mask */
121 while (generics_mask) {
122 unsigned index = ffsll(generics_mask) - 1;
123 remap_table[index] = count++;
124 generics_mask &= ~((uint64_t) 1 << index);
125 }
126 }
127
128
129 /**
130 * Use the generic remap table to map a TGSI generic varying variable
131 * index to a small integer. If the remapping table doesn't have a
132 * valid value for the given index (the table entry is -1) it means
133 * the fragment shader doesn't use that VS output. Just allocate
134 * the next free value in that case. Alternately, we could cull
135 * VS instructions that write to register, or replace the register
136 * with a dummy temp register.
137 * XXX TODO: we should do one of the later as it would save precious
138 * texcoord registers.
139 */
140 int
141 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
142 int generic_index)
143 {
144 assert(generic_index < MAX_GENERIC_VARYING);
145
146 if (generic_index >= MAX_GENERIC_VARYING) {
147 /* just don't return a random/garbage value */
148 generic_index = MAX_GENERIC_VARYING - 1;
149 }
150
151 if (remap_table[generic_index] == -1) {
152 /* This is a VS output that has no matching PS input. Find a
153 * free index.
154 */
155 int i, max = 0;
156 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
157 max = MAX2(max, remap_table[i]);
158 }
159 remap_table[generic_index] = max + 1;
160 }
161
162 return remap_table[generic_index];
163 }
164
165 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
166 PIPE_SWIZZLE_X,
167 PIPE_SWIZZLE_Y,
168 PIPE_SWIZZLE_Z,
169 PIPE_SWIZZLE_W,
170 PIPE_SWIZZLE_0,
171 PIPE_SWIZZLE_1,
172 PIPE_SWIZZLE_NONE
173 };
174
175 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
176 PIPE_SWIZZLE_X,
177 PIPE_SWIZZLE_Y,
178 PIPE_SWIZZLE_Z,
179 PIPE_SWIZZLE_1,
180 PIPE_SWIZZLE_0,
181 PIPE_SWIZZLE_1,
182 PIPE_SWIZZLE_NONE
183 };
184
185 static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
186 PIPE_SWIZZLE_0,
187 PIPE_SWIZZLE_0,
188 PIPE_SWIZZLE_0,
189 PIPE_SWIZZLE_X,
190 PIPE_SWIZZLE_0,
191 PIPE_SWIZZLE_1,
192 PIPE_SWIZZLE_NONE
193 };
194
195 static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
196 PIPE_SWIZZLE_X,
197 PIPE_SWIZZLE_X,
198 PIPE_SWIZZLE_X,
199 PIPE_SWIZZLE_X,
200 PIPE_SWIZZLE_0,
201 PIPE_SWIZZLE_1,
202 PIPE_SWIZZLE_NONE
203 };
204
205 static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
206 PIPE_SWIZZLE_X,
207 PIPE_SWIZZLE_X,
208 PIPE_SWIZZLE_X,
209 PIPE_SWIZZLE_1,
210 PIPE_SWIZZLE_0,
211 PIPE_SWIZZLE_1,
212 PIPE_SWIZZLE_NONE
213 };
214
215 static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
216 PIPE_SWIZZLE_X,
217 PIPE_SWIZZLE_X,
218 PIPE_SWIZZLE_X,
219 PIPE_SWIZZLE_Y,
220 PIPE_SWIZZLE_0,
221 PIPE_SWIZZLE_1,
222 PIPE_SWIZZLE_NONE
223 };
224
225
226 /**
227 * Initialize the shader-neutral fields of svga_compile_key from context
228 * state. This is basically the texture-related state.
229 */
230 void
231 svga_init_shader_key_common(const struct svga_context *svga,
232 enum pipe_shader_type shader,
233 struct svga_compile_key *key)
234 {
235 unsigned i, idx = 0;
236
237 assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
238
239 /* In case the number of samplers and sampler_views doesn't match,
240 * loop over the lower of the two counts.
241 */
242 key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
243 svga->curr.num_samplers[shader]);
244
245 for (i = 0; i < key->num_textures; i++) {
246 struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
247 const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
248 if (view) {
249 assert(view->texture);
250 assert(view->texture->target < (1 << 4)); /* texture_target:4 */
251
252 /* 1D/2D array textures with one slice and cube map array textures
253 * with one cube are treated as non-arrays by the SVGA3D device.
254 * Set the is_array flag only if we know that we have more than 1
255 * element. This will be used to select shader instruction/resource
256 * types during shader translation.
257 */
258 switch (view->texture->target) {
259 case PIPE_TEXTURE_1D_ARRAY:
260 case PIPE_TEXTURE_2D_ARRAY:
261 key->tex[i].is_array = view->texture->array_size > 1;
262 break;
263 case PIPE_TEXTURE_CUBE_ARRAY:
264 key->tex[i].is_array = view->texture->array_size > 6;
265 break;
266 default:
267 ; /* nothing / silence compiler warning */
268 }
269
270 assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
271 key->tex[i].num_samples = view->texture->nr_samples;
272
273 const enum pipe_swizzle *swizzle_tab;
274 if (view->texture->target == PIPE_BUFFER) {
275 SVGA3dSurfaceFormat svga_format;
276 unsigned tf_flags;
277
278 /* Apply any special swizzle mask for the view format if needed */
279
280 svga_translate_texture_buffer_view_format(view->format,
281 &svga_format, &tf_flags);
282 if (tf_flags & TF_000X)
283 swizzle_tab = set_000X;
284 else if (tf_flags & TF_XXXX)
285 swizzle_tab = set_XXXX;
286 else if (tf_flags & TF_XXX1)
287 swizzle_tab = set_XXX1;
288 else if (tf_flags & TF_XXXY)
289 swizzle_tab = set_XXXY;
290 else
291 swizzle_tab = copy_alpha;
292 }
293 else {
294 /* If we have a non-alpha view into an svga3d surface with an
295 * alpha channel, then explicitly set the alpha channel to 1
296 * when sampling. Note that we need to check the
297 * actual device format to cover also imported surface cases.
298 */
299 swizzle_tab =
300 (!util_format_has_alpha(view->format) &&
301 svga_texture_device_format_has_alpha(view->texture)) ?
302 set_alpha : copy_alpha;
303
304 if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
305 view->texture->format == PIPE_FORMAT_DXT1_SRGB)
306 swizzle_tab = set_alpha;
307 }
308
309 key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
310 key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
311 key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
312 key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
313 }
314
315 if (sampler) {
316 if (!sampler->normalized_coords) {
317 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
318 key->tex[i].width_height_idx = idx++;
319 key->tex[i].unnormalized = TRUE;
320 ++key->num_unnormalized_coords;
321
322 if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
323 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
324 key->tex[i].texel_bias = TRUE;
325 }
326 }
327 }
328 }
329 }
330
331
332 /** Search for a compiled shader variant with the same compile key */
333 struct svga_shader_variant *
334 svga_search_shader_key(const struct svga_shader *shader,
335 const struct svga_compile_key *key)
336 {
337 struct svga_shader_variant *variant = shader->variants;
338
339 assert(key);
340
341 for ( ; variant; variant = variant->next) {
342 if (svga_compile_keys_equal(key, &variant->key))
343 return variant;
344 }
345 return NULL;
346 }
347
348 /** Search for a shader with the same token key */
349 struct svga_shader *
350 svga_search_shader_token_key(struct svga_shader *pshader,
351 const struct svga_token_key *key)
352 {
353 struct svga_shader *shader = pshader;
354
355 assert(key);
356
357 for ( ; shader; shader = shader->next) {
358 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
359 return shader;
360 }
361 return NULL;
362 }
363
364 /**
365 * Helper function to define a gb shader for non-vgpu10 device
366 */
367 static enum pipe_error
368 define_gb_shader_vgpu9(struct svga_context *svga,
369 SVGA3dShaderType type,
370 struct svga_shader_variant *variant,
371 unsigned codeLen)
372 {
373 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
374 enum pipe_error ret;
375
376 /**
377 * Create gb memory for the shader and upload the shader code.
378 * Kernel module will allocate an id for the shader and issue
379 * the DefineGBShader command.
380 */
381 variant->gb_shader = sws->shader_create(sws, type,
382 variant->tokens, codeLen);
383
384 if (!variant->gb_shader)
385 return PIPE_ERROR_OUT_OF_MEMORY;
386
387 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
388
389 return ret;
390 }
391
392 /**
393 * Helper function to define a gb shader for vgpu10 device
394 */
395 static enum pipe_error
396 define_gb_shader_vgpu10(struct svga_context *svga,
397 SVGA3dShaderType type,
398 struct svga_shader_variant *variant,
399 unsigned codeLen)
400 {
401 struct svga_winsys_context *swc = svga->swc;
402 enum pipe_error ret;
403
404 /**
405 * Shaders in VGPU10 enabled device reside in the device COTable.
406 * SVGA driver will allocate an integer ID for the shader and
407 * issue DXDefineShader and DXBindShader commands.
408 */
409 variant->id = util_bitmask_add(svga->shader_id_bm);
410 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
411 return PIPE_ERROR_OUT_OF_MEMORY;
412 }
413
414 /* Create gb memory for the shader and upload the shader code */
415 variant->gb_shader = swc->shader_create(swc,
416 variant->id, type,
417 variant->tokens, codeLen);
418
419 if (!variant->gb_shader) {
420 /* Free the shader ID */
421 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
422 goto fail_no_allocation;
423 }
424
425 /**
426 * Since we don't want to do any flush within state emission to avoid
427 * partial state in a command buffer, it's important to make sure that
428 * there is enough room to send both the DXDefineShader & DXBindShader
429 * commands in the same command buffer. So let's send both
430 * commands in one command reservation. If it fails, we'll undo
431 * the shader creation and return an error.
432 */
433 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
434 variant->id, type, codeLen);
435
436 if (ret != PIPE_OK)
437 goto fail;
438
439 return PIPE_OK;
440
441 fail:
442 swc->shader_destroy(swc, variant->gb_shader);
443 variant->gb_shader = NULL;
444
445 fail_no_allocation:
446 util_bitmask_clear(svga->shader_id_bm, variant->id);
447 variant->id = UTIL_BITMASK_INVALID_INDEX;
448
449 return PIPE_ERROR_OUT_OF_MEMORY;
450 }
451
452 /**
453 * Issue the SVGA3D commands to define a new shader.
454 * \param variant contains the shader tokens, etc. The result->id field will
455 * be set here.
456 */
457 enum pipe_error
458 svga_define_shader(struct svga_context *svga,
459 SVGA3dShaderType type,
460 struct svga_shader_variant *variant)
461 {
462 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
463 enum pipe_error ret;
464
465 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
466
467 variant->id = UTIL_BITMASK_INVALID_INDEX;
468
469 if (svga_have_gb_objects(svga)) {
470 if (svga_have_vgpu10(svga))
471 ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
472 else
473 ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
474 }
475 else {
476 /* Allocate an integer ID for the shader */
477 variant->id = util_bitmask_add(svga->shader_id_bm);
478 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
479 ret = PIPE_ERROR_OUT_OF_MEMORY;
480 goto done;
481 }
482
483 /* Issue SVGA3D device command to define the shader */
484 ret = SVGA3D_DefineShader(svga->swc,
485 variant->id,
486 type,
487 variant->tokens,
488 codeLen);
489 if (ret != PIPE_OK) {
490 /* free the ID */
491 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
492 util_bitmask_clear(svga->shader_id_bm, variant->id);
493 variant->id = UTIL_BITMASK_INVALID_INDEX;
494 }
495 }
496
497 done:
498 SVGA_STATS_TIME_POP(svga_sws(svga));
499 return ret;
500 }
501
502
503 /**
504 * Issue the SVGA3D commands to set/bind a shader.
505 * \param result the shader to bind.
506 */
507 enum pipe_error
508 svga_set_shader(struct svga_context *svga,
509 SVGA3dShaderType type,
510 struct svga_shader_variant *variant)
511 {
512 enum pipe_error ret;
513 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
514
515 assert(type == SVGA3D_SHADERTYPE_VS ||
516 type == SVGA3D_SHADERTYPE_GS ||
517 type == SVGA3D_SHADERTYPE_PS);
518
519 if (svga_have_gb_objects(svga)) {
520 struct svga_winsys_gb_shader *gbshader =
521 variant ? variant->gb_shader : NULL;
522
523 if (svga_have_vgpu10(svga))
524 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
525 else
526 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
527 }
528 else {
529 ret = SVGA3D_SetShader(svga->swc, type, id);
530 }
531
532 return ret;
533 }
534
535
536 struct svga_shader_variant *
537 svga_new_shader_variant(struct svga_context *svga)
538 {
539 svga->hud.num_shaders++;
540 return CALLOC_STRUCT(svga_shader_variant);
541 }
542
543
544 enum pipe_error
545 svga_destroy_shader_variant(struct svga_context *svga,
546 SVGA3dShaderType type,
547 struct svga_shader_variant *variant)
548 {
549 enum pipe_error ret = PIPE_OK;
550
551 if (svga_have_gb_objects(svga) && variant->gb_shader) {
552 if (svga_have_vgpu10(svga)) {
553 struct svga_winsys_context *swc = svga->swc;
554 swc->shader_destroy(swc, variant->gb_shader);
555 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
556 if (ret != PIPE_OK) {
557 /* flush and try again */
558 svga_context_flush(svga, NULL);
559 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
560 }
561 util_bitmask_clear(svga->shader_id_bm, variant->id);
562 }
563 else {
564 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
565 sws->shader_destroy(sws, variant->gb_shader);
566 }
567 variant->gb_shader = NULL;
568 }
569 else {
570 if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
571 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
572 if (ret != PIPE_OK) {
573 /* flush and try again */
574 svga_context_flush(svga, NULL);
575 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
576 assert(ret == PIPE_OK);
577 }
578 util_bitmask_clear(svga->shader_id_bm, variant->id);
579 }
580 }
581
582 FREE((unsigned *)variant->tokens);
583 FREE(variant);
584
585 svga->hud.num_shaders--;
586
587 return ret;
588 }
589
590 /*
591 * Rebind shaders.
592 * Called at the beginning of every new command buffer to ensure that
593 * shaders are properly paged-in. Instead of sending the SetShader
594 * command, this function sends a private allocation command to
595 * page in a shader. This avoids emitting redundant state to the device
596 * just to page in a resource.
597 */
598 enum pipe_error
599 svga_rebind_shaders(struct svga_context *svga)
600 {
601 struct svga_winsys_context *swc = svga->swc;
602 struct svga_hw_draw_state *hw = &svga->state.hw_draw;
603 enum pipe_error ret;
604
605 assert(svga_have_vgpu10(svga));
606
607 /**
608 * If the underlying winsys layer does not need resource rebinding,
609 * just clear the rebind flags and return.
610 */
611 if (swc->resource_rebind == NULL) {
612 svga->rebind.flags.vs = 0;
613 svga->rebind.flags.gs = 0;
614 svga->rebind.flags.fs = 0;
615
616 return PIPE_OK;
617 }
618
619 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
620 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
621 if (ret != PIPE_OK)
622 return ret;
623 }
624 svga->rebind.flags.vs = 0;
625
626 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
627 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
628 if (ret != PIPE_OK)
629 return ret;
630 }
631 svga->rebind.flags.gs = 0;
632
633 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
634 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
635 if (ret != PIPE_OK)
636 return ret;
637 }
638 svga->rebind.flags.fs = 0;
639
640 return PIPE_OK;
641 }