1 /**********************************************************
2 * Copyright 2008-2012 VMware, Inc. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 **********************************************************/
26 #include "util/u_bitmask.h"
27 #include "util/u_memory.h"
28 #include "util/format/u_format.h"
29 #include "svga_context.h"
31 #include "svga_format.h"
32 #include "svga_shader.h"
33 #include "svga_resource_texture.h"
37 * This bit isn't really used anywhere. It only serves to help
38 * generate a unique "signature" for the vertex shader output bitmask.
39 * Shader input/output signatures are used to resolve shader linking
42 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
46 * Use the shader info to generate a bitmask indicating which generic
47 * inputs are used by the shader. A set bit indicates that GENERIC[i]
51 svga_get_generic_inputs_mask(const struct tgsi_shader_info
*info
)
56 for (i
= 0; i
< info
->num_inputs
; i
++) {
57 if (info
->input_semantic_name
[i
] == TGSI_SEMANTIC_GENERIC
) {
58 unsigned j
= info
->input_semantic_index
[i
];
59 assert(j
< sizeof(mask
) * 8);
60 mask
|= ((uint64_t) 1) << j
;
69 * Scan shader info to return a bitmask of written outputs.
72 svga_get_generic_outputs_mask(const struct tgsi_shader_info
*info
)
77 for (i
= 0; i
< info
->num_outputs
; i
++) {
78 switch (info
->output_semantic_name
[i
]) {
79 case TGSI_SEMANTIC_GENERIC
:
81 unsigned j
= info
->output_semantic_index
[i
];
82 assert(j
< sizeof(mask
) * 8);
83 mask
|= ((uint64_t) 1) << j
;
86 case TGSI_SEMANTIC_FOG
:
87 mask
|= FOG_GENERIC_BIT
;
98 * Given a mask of used generic variables (as returned by the above functions)
99 * fill in a table which maps those indexes to small integers.
100 * This table is used by the remap_generic_index() function in
101 * svga_tgsi_decl_sm30.c
102 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
103 * GENERIC[3] are used. The remap_table will contain:
106 * The remaining table entries will be filled in with the next unused
107 * generic index (in this example, 2).
110 svga_remap_generics(uint64_t generics_mask
,
111 int8_t remap_table
[MAX_GENERIC_VARYING
])
113 /* Note texcoord[0] is reserved so start at 1 */
114 unsigned count
= 1, i
;
116 for (i
= 0; i
< MAX_GENERIC_VARYING
; i
++) {
120 /* for each bit set in generic_mask */
121 while (generics_mask
) {
122 unsigned index
= ffsll(generics_mask
) - 1;
123 remap_table
[index
] = count
++;
124 generics_mask
&= ~((uint64_t) 1 << index
);
130 * Use the generic remap table to map a TGSI generic varying variable
131 * index to a small integer. If the remapping table doesn't have a
132 * valid value for the given index (the table entry is -1) it means
133 * the fragment shader doesn't use that VS output. Just allocate
134 * the next free value in that case. Alternately, we could cull
135 * VS instructions that write to register, or replace the register
136 * with a dummy temp register.
137 * XXX TODO: we should do one of the later as it would save precious
138 * texcoord registers.
141 svga_remap_generic_index(int8_t remap_table
[MAX_GENERIC_VARYING
],
144 assert(generic_index
< MAX_GENERIC_VARYING
);
146 if (generic_index
>= MAX_GENERIC_VARYING
) {
147 /* just don't return a random/garbage value */
148 generic_index
= MAX_GENERIC_VARYING
- 1;
151 if (remap_table
[generic_index
] == -1) {
152 /* This is a VS output that has no matching PS input. Find a
156 for (i
= 0; i
< MAX_GENERIC_VARYING
; i
++) {
157 max
= MAX2(max
, remap_table
[i
]);
159 remap_table
[generic_index
] = max
+ 1;
162 return remap_table
[generic_index
];
165 static const enum pipe_swizzle copy_alpha
[PIPE_SWIZZLE_MAX
] = {
175 static const enum pipe_swizzle set_alpha
[PIPE_SWIZZLE_MAX
] = {
185 static const enum pipe_swizzle set_000X
[PIPE_SWIZZLE_MAX
] = {
195 static const enum pipe_swizzle set_XXXX
[PIPE_SWIZZLE_MAX
] = {
205 static const enum pipe_swizzle set_XXX1
[PIPE_SWIZZLE_MAX
] = {
215 static const enum pipe_swizzle set_XXXY
[PIPE_SWIZZLE_MAX
] = {
227 * Initialize the shader-neutral fields of svga_compile_key from context
228 * state. This is basically the texture-related state.
231 svga_init_shader_key_common(const struct svga_context
*svga
,
232 enum pipe_shader_type shader_type
,
233 const struct svga_shader
*shader
,
234 struct svga_compile_key
*key
)
238 assert(shader_type
< ARRAY_SIZE(svga
->curr
.num_sampler_views
));
240 /* In case the number of samplers and sampler_views doesn't match,
241 * loop over the lower of the two counts.
243 key
->num_textures
= MAX2(svga
->curr
.num_sampler_views
[shader_type
],
244 svga
->curr
.num_samplers
[shader_type
]);
246 for (i
= 0; i
< key
->num_textures
; i
++) {
247 struct pipe_sampler_view
*view
= svga
->curr
.sampler_views
[shader_type
][i
];
248 const struct svga_sampler_state
249 *sampler
= svga
->curr
.sampler
[shader_type
][i
];
252 assert(view
->texture
);
253 assert(view
->texture
->target
< (1 << 4)); /* texture_target:4 */
255 /* 1D/2D array textures with one slice and cube map array textures
256 * with one cube are treated as non-arrays by the SVGA3D device.
257 * Set the is_array flag only if we know that we have more than 1
258 * element. This will be used to select shader instruction/resource
259 * types during shader translation.
261 switch (view
->texture
->target
) {
262 case PIPE_TEXTURE_1D_ARRAY
:
263 case PIPE_TEXTURE_2D_ARRAY
:
264 key
->tex
[i
].is_array
= view
->texture
->array_size
> 1;
266 case PIPE_TEXTURE_CUBE_ARRAY
:
267 key
->tex
[i
].is_array
= view
->texture
->array_size
> 6;
270 ; /* nothing / silence compiler warning */
273 assert(view
->texture
->nr_samples
< (1 << 5)); /* 5-bit field */
274 key
->tex
[i
].num_samples
= view
->texture
->nr_samples
;
276 const enum pipe_swizzle
*swizzle_tab
;
277 if (view
->texture
->target
== PIPE_BUFFER
) {
278 SVGA3dSurfaceFormat svga_format
;
281 /* Apply any special swizzle mask for the view format if needed */
283 svga_translate_texture_buffer_view_format(view
->format
,
284 &svga_format
, &tf_flags
);
285 if (tf_flags
& TF_000X
)
286 swizzle_tab
= set_000X
;
287 else if (tf_flags
& TF_XXXX
)
288 swizzle_tab
= set_XXXX
;
289 else if (tf_flags
& TF_XXX1
)
290 swizzle_tab
= set_XXX1
;
291 else if (tf_flags
& TF_XXXY
)
292 swizzle_tab
= set_XXXY
;
294 swizzle_tab
= copy_alpha
;
297 /* If we have a non-alpha view into an svga3d surface with an
298 * alpha channel, then explicitly set the alpha channel to 1
299 * when sampling. Note that we need to check the
300 * actual device format to cover also imported surface cases.
303 (!util_format_has_alpha(view
->format
) &&
304 svga_texture_device_format_has_alpha(view
->texture
)) ?
305 set_alpha
: copy_alpha
;
307 if (view
->texture
->format
== PIPE_FORMAT_DXT1_RGB
||
308 view
->texture
->format
== PIPE_FORMAT_DXT1_SRGB
)
309 swizzle_tab
= set_alpha
;
311 /* Save the compare function as we need to handle
312 * depth compare in the shader.
314 key
->tex
[i
].compare_mode
= sampler
->compare_mode
;
315 key
->tex
[i
].compare_func
= sampler
->compare_func
;
318 key
->tex
[i
].swizzle_r
= swizzle_tab
[view
->swizzle_r
];
319 key
->tex
[i
].swizzle_g
= swizzle_tab
[view
->swizzle_g
];
320 key
->tex
[i
].swizzle_b
= swizzle_tab
[view
->swizzle_b
];
321 key
->tex
[i
].swizzle_a
= swizzle_tab
[view
->swizzle_a
];
325 if (!sampler
->normalized_coords
) {
327 assert(idx
< (1 << 5)); /* width_height_idx:5 bitfield */
328 key
->tex
[i
].width_height_idx
= idx
++;
330 key
->tex
[i
].unnormalized
= TRUE
;
331 ++key
->num_unnormalized_coords
;
333 if (sampler
->magfilter
== SVGA3D_TEX_FILTER_NEAREST
||
334 sampler
->minfilter
== SVGA3D_TEX_FILTER_NEAREST
) {
335 key
->tex
[i
].texel_bias
= TRUE
;
341 key
->clamp_vertex_color
= svga
->curr
.rast
?
342 svga
->curr
.rast
->templ
.clamp_vertex_color
: 0;
346 /** Search for a compiled shader variant with the same compile key */
347 struct svga_shader_variant
*
348 svga_search_shader_key(const struct svga_shader
*shader
,
349 const struct svga_compile_key
*key
)
351 struct svga_shader_variant
*variant
= shader
->variants
;
355 for ( ; variant
; variant
= variant
->next
) {
356 if (svga_compile_keys_equal(key
, &variant
->key
))
362 /** Search for a shader with the same token key */
364 svga_search_shader_token_key(struct svga_shader
*pshader
,
365 const struct svga_token_key
*key
)
367 struct svga_shader
*shader
= pshader
;
371 for ( ; shader
; shader
= shader
->next
) {
372 if (memcmp(key
, &shader
->token_key
, sizeof(struct svga_token_key
)) == 0)
379 * Helper function to define a gb shader for non-vgpu10 device
381 static enum pipe_error
382 define_gb_shader_vgpu9(struct svga_context
*svga
,
383 struct svga_shader_variant
*variant
,
386 struct svga_winsys_screen
*sws
= svga_screen(svga
->pipe
.screen
)->sws
;
390 * Create gb memory for the shader and upload the shader code.
391 * Kernel module will allocate an id for the shader and issue
392 * the DefineGBShader command.
394 variant
->gb_shader
= sws
->shader_create(sws
, variant
->type
,
395 variant
->tokens
, codeLen
);
397 svga
->hud
.shader_mem_used
+= codeLen
;
399 if (!variant
->gb_shader
)
400 return PIPE_ERROR_OUT_OF_MEMORY
;
402 ret
= SVGA3D_BindGBShader(svga
->swc
, variant
->gb_shader
);
408 * Helper function to define a gb shader for vgpu10 device
410 static enum pipe_error
411 define_gb_shader_vgpu10(struct svga_context
*svga
,
412 struct svga_shader_variant
*variant
,
415 struct svga_winsys_context
*swc
= svga
->swc
;
417 unsigned len
= codeLen
+ variant
->signatureLen
;
420 * Shaders in VGPU10 enabled device reside in the device COTable.
421 * SVGA driver will allocate an integer ID for the shader and
422 * issue DXDefineShader and DXBindShader commands.
424 variant
->id
= util_bitmask_add(svga
->shader_id_bm
);
425 if (variant
->id
== UTIL_BITMASK_INVALID_INDEX
) {
426 return PIPE_ERROR_OUT_OF_MEMORY
;
429 /* Create gb memory for the shader and upload the shader code */
430 variant
->gb_shader
= swc
->shader_create(swc
,
431 variant
->id
, variant
->type
,
432 variant
->tokens
, codeLen
,
434 variant
->signatureLen
);
436 svga
->hud
.shader_mem_used
+= len
;
438 if (!variant
->gb_shader
) {
439 /* Free the shader ID */
440 assert(variant
->id
!= UTIL_BITMASK_INVALID_INDEX
);
441 goto fail_no_allocation
;
445 * Since we don't want to do any flush within state emission to avoid
446 * partial state in a command buffer, it's important to make sure that
447 * there is enough room to send both the DXDefineShader & DXBindShader
448 * commands in the same command buffer. So let's send both
449 * commands in one command reservation. If it fails, we'll undo
450 * the shader creation and return an error.
452 ret
= SVGA3D_vgpu10_DefineAndBindShader(swc
, variant
->gb_shader
,
453 variant
->id
, variant
->type
,
462 swc
->shader_destroy(swc
, variant
->gb_shader
);
463 variant
->gb_shader
= NULL
;
466 util_bitmask_clear(svga
->shader_id_bm
, variant
->id
);
467 variant
->id
= UTIL_BITMASK_INVALID_INDEX
;
469 return PIPE_ERROR_OUT_OF_MEMORY
;
473 * Issue the SVGA3D commands to define a new shader.
474 * \param variant contains the shader tokens, etc. The result->id field will
478 svga_define_shader(struct svga_context
*svga
,
479 struct svga_shader_variant
*variant
)
481 unsigned codeLen
= variant
->nr_tokens
* sizeof(variant
->tokens
[0]);
484 SVGA_STATS_TIME_PUSH(svga_sws(svga
), SVGA_STATS_TIME_DEFINESHADER
);
486 variant
->id
= UTIL_BITMASK_INVALID_INDEX
;
488 if (svga_have_gb_objects(svga
)) {
489 if (svga_have_vgpu10(svga
))
490 ret
= define_gb_shader_vgpu10(svga
, variant
, codeLen
);
492 ret
= define_gb_shader_vgpu9(svga
, variant
, codeLen
);
495 /* Allocate an integer ID for the shader */
496 variant
->id
= util_bitmask_add(svga
->shader_id_bm
);
497 if (variant
->id
== UTIL_BITMASK_INVALID_INDEX
) {
498 ret
= PIPE_ERROR_OUT_OF_MEMORY
;
502 /* Issue SVGA3D device command to define the shader */
503 ret
= SVGA3D_DefineShader(svga
->swc
,
508 if (ret
!= PIPE_OK
) {
510 assert(variant
->id
!= UTIL_BITMASK_INVALID_INDEX
);
511 util_bitmask_clear(svga
->shader_id_bm
, variant
->id
);
512 variant
->id
= UTIL_BITMASK_INVALID_INDEX
;
517 SVGA_STATS_TIME_POP(svga_sws(svga
));
523 * Issue the SVGA3D commands to set/bind a shader.
524 * \param result the shader to bind.
527 svga_set_shader(struct svga_context
*svga
,
528 SVGA3dShaderType type
,
529 struct svga_shader_variant
*variant
)
532 unsigned id
= variant
? variant
->id
: SVGA3D_INVALID_ID
;
534 assert(type
== SVGA3D_SHADERTYPE_VS
||
535 type
== SVGA3D_SHADERTYPE_GS
||
536 type
== SVGA3D_SHADERTYPE_PS
||
537 type
== SVGA3D_SHADERTYPE_HS
||
538 type
== SVGA3D_SHADERTYPE_DS
||
539 type
== SVGA3D_SHADERTYPE_CS
);
541 if (svga_have_gb_objects(svga
)) {
542 struct svga_winsys_gb_shader
*gbshader
=
543 variant
? variant
->gb_shader
: NULL
;
545 if (svga_have_vgpu10(svga
))
546 ret
= SVGA3D_vgpu10_SetShader(svga
->swc
, type
, gbshader
, id
);
548 ret
= SVGA3D_SetGBShader(svga
->swc
, type
, gbshader
);
551 ret
= SVGA3D_SetShader(svga
->swc
, type
, id
);
558 struct svga_shader_variant
*
559 svga_new_shader_variant(struct svga_context
*svga
, enum pipe_shader_type type
)
561 struct svga_shader_variant
*variant
;
564 case PIPE_SHADER_FRAGMENT
:
565 variant
= CALLOC(1, sizeof(struct svga_fs_variant
));
567 case PIPE_SHADER_GEOMETRY
:
568 variant
= CALLOC(1, sizeof(struct svga_gs_variant
));
570 case PIPE_SHADER_VERTEX
:
571 variant
= CALLOC(1, sizeof(struct svga_vs_variant
));
573 case PIPE_SHADER_TESS_EVAL
:
574 variant
= CALLOC(1, sizeof(struct svga_tes_variant
));
576 case PIPE_SHADER_TESS_CTRL
:
577 variant
= CALLOC(1, sizeof(struct svga_tcs_variant
));
584 variant
->type
= svga_shader_type(type
);
585 svga
->hud
.num_shaders
++;
592 svga_destroy_shader_variant(struct svga_context
*svga
,
593 struct svga_shader_variant
*variant
)
595 if (svga_have_gb_objects(svga
) && variant
->gb_shader
) {
596 if (svga_have_vgpu10(svga
)) {
597 struct svga_winsys_context
*swc
= svga
->swc
;
598 swc
->shader_destroy(swc
, variant
->gb_shader
);
599 SVGA_RETRY(svga
, SVGA3D_vgpu10_DestroyShader(svga
->swc
, variant
->id
));
600 util_bitmask_clear(svga
->shader_id_bm
, variant
->id
);
603 struct svga_winsys_screen
*sws
= svga_screen(svga
->pipe
.screen
)->sws
;
604 sws
->shader_destroy(sws
, variant
->gb_shader
);
606 variant
->gb_shader
= NULL
;
609 if (variant
->id
!= UTIL_BITMASK_INVALID_INDEX
) {
610 SVGA_RETRY(svga
, SVGA3D_DestroyShader(svga
->swc
, variant
->id
,
612 util_bitmask_clear(svga
->shader_id_bm
, variant
->id
);
616 FREE(variant
->signature
);
617 FREE((unsigned *)variant
->tokens
);
620 svga
->hud
.num_shaders
--;
625 * Called at the beginning of every new command buffer to ensure that
626 * shaders are properly paged-in. Instead of sending the SetShader
627 * command, this function sends a private allocation command to
628 * page in a shader. This avoids emitting redundant state to the device
629 * just to page in a resource.
632 svga_rebind_shaders(struct svga_context
*svga
)
634 struct svga_winsys_context
*swc
= svga
->swc
;
635 struct svga_hw_draw_state
*hw
= &svga
->state
.hw_draw
;
638 assert(svga_have_vgpu10(svga
));
641 * If the underlying winsys layer does not need resource rebinding,
642 * just clear the rebind flags and return.
644 if (swc
->resource_rebind
== NULL
) {
645 svga
->rebind
.flags
.vs
= 0;
646 svga
->rebind
.flags
.gs
= 0;
647 svga
->rebind
.flags
.fs
= 0;
648 svga
->rebind
.flags
.tcs
= 0;
649 svga
->rebind
.flags
.tes
= 0;
654 if (svga
->rebind
.flags
.vs
&& hw
->vs
&& hw
->vs
->gb_shader
) {
655 ret
= swc
->resource_rebind(swc
, NULL
, hw
->vs
->gb_shader
, SVGA_RELOC_READ
);
659 svga
->rebind
.flags
.vs
= 0;
661 if (svga
->rebind
.flags
.gs
&& hw
->gs
&& hw
->gs
->gb_shader
) {
662 ret
= swc
->resource_rebind(swc
, NULL
, hw
->gs
->gb_shader
, SVGA_RELOC_READ
);
666 svga
->rebind
.flags
.gs
= 0;
668 if (svga
->rebind
.flags
.fs
&& hw
->fs
&& hw
->fs
->gb_shader
) {
669 ret
= swc
->resource_rebind(swc
, NULL
, hw
->fs
->gb_shader
, SVGA_RELOC_READ
);
673 svga
->rebind
.flags
.fs
= 0;
675 if (svga
->rebind
.flags
.tcs
&& hw
->tcs
&& hw
->tcs
->gb_shader
) {
676 ret
= swc
->resource_rebind(swc
, NULL
, hw
->tcs
->gb_shader
, SVGA_RELOC_READ
);
680 svga
->rebind
.flags
.tcs
= 0;
682 if (svga
->rebind
.flags
.tes
&& hw
->tes
&& hw
->tes
->gb_shader
) {
683 ret
= swc
->resource_rebind(swc
, NULL
, hw
->tes
->gb_shader
, SVGA_RELOC_READ
);
687 svga
->rebind
.flags
.tes
= 0;