svga: minor simplification in svga_validate_surface_view()
[mesa.git] / src / gallium / drivers / svga / svga_shader.c
1 /**********************************************************
2 * Copyright 2008-2012 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "util/u_bitmask.h"
27 #include "util/u_memory.h"
28 #include "svga_context.h"
29 #include "svga_cmd.h"
30 #include "svga_format.h"
31 #include "svga_shader.h"
32
33
34 /**
35 * This bit isn't really used anywhere. It only serves to help
36 * generate a unique "signature" for the vertex shader output bitmask.
37 * Shader input/output signatures are used to resolve shader linking
38 * issues.
39 */
40 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
41
42
43 /**
44 * Use the shader info to generate a bitmask indicating which generic
45 * inputs are used by the shader. A set bit indicates that GENERIC[i]
46 * is used.
47 */
48 uint64_t
49 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
50 {
51 unsigned i;
52 uint64_t mask = 0x0;
53
54 for (i = 0; i < info->num_inputs; i++) {
55 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
56 unsigned j = info->input_semantic_index[i];
57 assert(j < sizeof(mask) * 8);
58 mask |= ((uint64_t) 1) << j;
59 }
60 }
61
62 return mask;
63 }
64
65
66 /**
67 * Scan shader info to return a bitmask of written outputs.
68 */
69 uint64_t
70 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
71 {
72 unsigned i;
73 uint64_t mask = 0x0;
74
75 for (i = 0; i < info->num_outputs; i++) {
76 switch (info->output_semantic_name[i]) {
77 case TGSI_SEMANTIC_GENERIC:
78 {
79 unsigned j = info->output_semantic_index[i];
80 assert(j < sizeof(mask) * 8);
81 mask |= ((uint64_t) 1) << j;
82 }
83 break;
84 case TGSI_SEMANTIC_FOG:
85 mask |= FOG_GENERIC_BIT;
86 break;
87 }
88 }
89
90 return mask;
91 }
92
93
94
95 /**
96 * Given a mask of used generic variables (as returned by the above functions)
97 * fill in a table which maps those indexes to small integers.
98 * This table is used by the remap_generic_index() function in
99 * svga_tgsi_decl_sm30.c
100 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
101 * GENERIC[3] are used. The remap_table will contain:
102 * table[1] = 0;
103 * table[3] = 1;
104 * The remaining table entries will be filled in with the next unused
105 * generic index (in this example, 2).
106 */
107 void
108 svga_remap_generics(uint64_t generics_mask,
109 int8_t remap_table[MAX_GENERIC_VARYING])
110 {
111 /* Note texcoord[0] is reserved so start at 1 */
112 unsigned count = 1, i;
113
114 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
115 remap_table[i] = -1;
116 }
117
118 /* for each bit set in generic_mask */
119 while (generics_mask) {
120 unsigned index = ffsll(generics_mask) - 1;
121 remap_table[index] = count++;
122 generics_mask &= ~((uint64_t) 1 << index);
123 }
124 }
125
126
127 /**
128 * Use the generic remap table to map a TGSI generic varying variable
129 * index to a small integer. If the remapping table doesn't have a
130 * valid value for the given index (the table entry is -1) it means
131 * the fragment shader doesn't use that VS output. Just allocate
132 * the next free value in that case. Alternately, we could cull
133 * VS instructions that write to register, or replace the register
134 * with a dummy temp register.
135 * XXX TODO: we should do one of the later as it would save precious
136 * texcoord registers.
137 */
138 int
139 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
140 int generic_index)
141 {
142 assert(generic_index < MAX_GENERIC_VARYING);
143
144 if (generic_index >= MAX_GENERIC_VARYING) {
145 /* just don't return a random/garbage value */
146 generic_index = MAX_GENERIC_VARYING - 1;
147 }
148
149 if (remap_table[generic_index] == -1) {
150 /* This is a VS output that has no matching PS input. Find a
151 * free index.
152 */
153 int i, max = 0;
154 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
155 max = MAX2(max, remap_table[i]);
156 }
157 remap_table[generic_index] = max + 1;
158 }
159
160 return remap_table[generic_index];
161 }
162
163
164 /**
165 * Initialize the shader-neutral fields of svga_compile_key from context
166 * state. This is basically the texture-related state.
167 */
168 void
169 svga_init_shader_key_common(const struct svga_context *svga,
170 enum pipe_shader_type shader,
171 struct svga_compile_key *key)
172 {
173 unsigned i, idx = 0;
174
175 assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
176
177 /* In case the number of samplers and sampler_views doesn't match,
178 * loop over the lower of the two counts.
179 */
180 key->num_textures = MIN2(svga->curr.num_sampler_views[shader],
181 svga->curr.num_samplers[shader]);
182
183 for (i = 0; i < key->num_textures; i++) {
184 struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
185 const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
186 if (view && sampler) {
187 assert(view->texture);
188 assert(view->texture->target < (1 << 4)); /* texture_target:4 */
189
190 /* 1D/2D array textures with one slice are treated as non-arrays
191 * by the SVGA3D device. Convert the texture type here so that
192 * we emit the right TEX/SAMPLE instruction in the shader.
193 */
194 if (view->texture->target == PIPE_TEXTURE_1D_ARRAY ||
195 view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
196 if (view->texture->array_size == 1) {
197 key->tex[i].is_array = 0;
198 }
199 else {
200 assert(view->texture->array_size > 1);
201 key->tex[i].is_array = 1;
202 }
203 }
204
205 if (!sampler->normalized_coords) {
206 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
207 key->tex[i].width_height_idx = idx++;
208 key->tex[i].unnormalized = TRUE;
209 ++key->num_unnormalized_coords;
210 }
211
212 key->tex[i].swizzle_r = view->swizzle_r;
213 key->tex[i].swizzle_g = view->swizzle_g;
214 key->tex[i].swizzle_b = view->swizzle_b;
215 key->tex[i].swizzle_a = view->swizzle_a;
216 }
217 }
218 }
219
220
221 /** Search for a compiled shader variant with the same compile key */
222 struct svga_shader_variant *
223 svga_search_shader_key(const struct svga_shader *shader,
224 const struct svga_compile_key *key)
225 {
226 struct svga_shader_variant *variant = shader->variants;
227
228 assert(key);
229
230 for ( ; variant; variant = variant->next) {
231 if (svga_compile_keys_equal(key, &variant->key))
232 return variant;
233 }
234 return NULL;
235 }
236
237 /** Search for a shader with the same token key */
238 struct svga_shader *
239 svga_search_shader_token_key(struct svga_shader *pshader,
240 const struct svga_token_key *key)
241 {
242 struct svga_shader *shader = pshader;
243
244 assert(key);
245
246 for ( ; shader; shader = shader->next) {
247 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
248 return shader;
249 }
250 return NULL;
251 }
252
253 /**
254 * Helper function to define a gb shader for non-vgpu10 device
255 */
256 static enum pipe_error
257 define_gb_shader_vgpu9(struct svga_context *svga,
258 SVGA3dShaderType type,
259 struct svga_shader_variant *variant,
260 unsigned codeLen)
261 {
262 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
263 enum pipe_error ret;
264
265 /**
266 * Create gb memory for the shader and upload the shader code.
267 * Kernel module will allocate an id for the shader and issue
268 * the DefineGBShader command.
269 */
270 variant->gb_shader = sws->shader_create(sws, type,
271 variant->tokens, codeLen);
272
273 if (!variant->gb_shader)
274 return PIPE_ERROR_OUT_OF_MEMORY;
275
276 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
277
278 return ret;
279 }
280
281 /**
282 * Helper function to define a gb shader for vgpu10 device
283 */
284 static enum pipe_error
285 define_gb_shader_vgpu10(struct svga_context *svga,
286 SVGA3dShaderType type,
287 struct svga_shader_variant *variant,
288 unsigned codeLen)
289 {
290 struct svga_winsys_context *swc = svga->swc;
291 enum pipe_error ret;
292
293 /**
294 * Shaders in VGPU10 enabled device reside in the device COTable.
295 * SVGA driver will allocate an integer ID for the shader and
296 * issue DXDefineShader and DXBindShader commands.
297 */
298 variant->id = util_bitmask_add(svga->shader_id_bm);
299 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
300 return PIPE_ERROR_OUT_OF_MEMORY;
301 }
302
303 /* Create gb memory for the shader and upload the shader code */
304 variant->gb_shader = swc->shader_create(swc,
305 variant->id, type,
306 variant->tokens, codeLen);
307
308 if (!variant->gb_shader) {
309 /* Free the shader ID */
310 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
311 goto fail_no_allocation;
312 }
313
314 /**
315 * Since we don't want to do any flush within state emission to avoid
316 * partial state in a command buffer, it's important to make sure that
317 * there is enough room to send both the DXDefineShader & DXBindShader
318 * commands in the same command buffer. So let's send both
319 * commands in one command reservation. If it fails, we'll undo
320 * the shader creation and return an error.
321 */
322 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
323 variant->id, type, codeLen);
324
325 if (ret != PIPE_OK)
326 goto fail;
327
328 return PIPE_OK;
329
330 fail:
331 swc->shader_destroy(swc, variant->gb_shader);
332 variant->gb_shader = NULL;
333
334 fail_no_allocation:
335 util_bitmask_clear(svga->shader_id_bm, variant->id);
336 variant->id = UTIL_BITMASK_INVALID_INDEX;
337
338 return PIPE_ERROR_OUT_OF_MEMORY;
339 }
340
341 /**
342 * Issue the SVGA3D commands to define a new shader.
343 * \param variant contains the shader tokens, etc. The result->id field will
344 * be set here.
345 */
346 enum pipe_error
347 svga_define_shader(struct svga_context *svga,
348 SVGA3dShaderType type,
349 struct svga_shader_variant *variant)
350 {
351 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
352 enum pipe_error ret;
353
354 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
355
356 variant->id = UTIL_BITMASK_INVALID_INDEX;
357
358 if (svga_have_gb_objects(svga)) {
359 if (svga_have_vgpu10(svga))
360 ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
361 else
362 ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
363 }
364 else {
365 /* Allocate an integer ID for the shader */
366 variant->id = util_bitmask_add(svga->shader_id_bm);
367 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
368 ret = PIPE_ERROR_OUT_OF_MEMORY;
369 goto done;
370 }
371
372 /* Issue SVGA3D device command to define the shader */
373 ret = SVGA3D_DefineShader(svga->swc,
374 variant->id,
375 type,
376 variant->tokens,
377 codeLen);
378 if (ret != PIPE_OK) {
379 /* free the ID */
380 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
381 util_bitmask_clear(svga->shader_id_bm, variant->id);
382 variant->id = UTIL_BITMASK_INVALID_INDEX;
383 }
384 }
385
386 done:
387 SVGA_STATS_TIME_POP(svga_sws(svga));
388 return ret;
389 }
390
391
392 /**
393 * Issue the SVGA3D commands to set/bind a shader.
394 * \param result the shader to bind.
395 */
396 enum pipe_error
397 svga_set_shader(struct svga_context *svga,
398 SVGA3dShaderType type,
399 struct svga_shader_variant *variant)
400 {
401 enum pipe_error ret;
402 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
403
404 assert(type == SVGA3D_SHADERTYPE_VS ||
405 type == SVGA3D_SHADERTYPE_GS ||
406 type == SVGA3D_SHADERTYPE_PS);
407
408 if (svga_have_gb_objects(svga)) {
409 struct svga_winsys_gb_shader *gbshader =
410 variant ? variant->gb_shader : NULL;
411
412 if (svga_have_vgpu10(svga))
413 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
414 else
415 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
416 }
417 else {
418 ret = SVGA3D_SetShader(svga->swc, type, id);
419 }
420
421 return ret;
422 }
423
424
425 struct svga_shader_variant *
426 svga_new_shader_variant(struct svga_context *svga)
427 {
428 svga->hud.num_shaders++;
429 return CALLOC_STRUCT(svga_shader_variant);
430 }
431
432
433 enum pipe_error
434 svga_destroy_shader_variant(struct svga_context *svga,
435 SVGA3dShaderType type,
436 struct svga_shader_variant *variant)
437 {
438 enum pipe_error ret = PIPE_OK;
439
440 if (svga_have_gb_objects(svga) && variant->gb_shader) {
441 if (svga_have_vgpu10(svga)) {
442 struct svga_winsys_context *swc = svga->swc;
443 swc->shader_destroy(swc, variant->gb_shader);
444 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
445 if (ret != PIPE_OK) {
446 /* flush and try again */
447 svga_context_flush(svga, NULL);
448 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
449 }
450 util_bitmask_clear(svga->shader_id_bm, variant->id);
451 }
452 else {
453 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
454 sws->shader_destroy(sws, variant->gb_shader);
455 }
456 variant->gb_shader = NULL;
457 }
458 else {
459 if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
460 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
461 if (ret != PIPE_OK) {
462 /* flush and try again */
463 svga_context_flush(svga, NULL);
464 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
465 assert(ret == PIPE_OK);
466 }
467 util_bitmask_clear(svga->shader_id_bm, variant->id);
468 }
469 }
470
471 FREE((unsigned *)variant->tokens);
472 FREE(variant);
473
474 svga->hud.num_shaders--;
475
476 return ret;
477 }
478
479 /*
480 * Rebind shaders.
481 * Called at the beginning of every new command buffer to ensure that
482 * shaders are properly paged-in. Instead of sending the SetShader
483 * command, this function sends a private allocation command to
484 * page in a shader. This avoids emitting redundant state to the device
485 * just to page in a resource.
486 */
487 enum pipe_error
488 svga_rebind_shaders(struct svga_context *svga)
489 {
490 struct svga_winsys_context *swc = svga->swc;
491 struct svga_hw_draw_state *hw = &svga->state.hw_draw;
492 enum pipe_error ret;
493
494 assert(svga_have_vgpu10(svga));
495
496 /**
497 * If the underlying winsys layer does not need resource rebinding,
498 * just clear the rebind flags and return.
499 */
500 if (swc->resource_rebind == NULL) {
501 svga->rebind.flags.vs = 0;
502 svga->rebind.flags.gs = 0;
503 svga->rebind.flags.fs = 0;
504
505 return PIPE_OK;
506 }
507
508 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
509 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
510 if (ret != PIPE_OK)
511 return ret;
512 }
513 svga->rebind.flags.vs = 0;
514
515 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
516 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
517 if (ret != PIPE_OK)
518 return ret;
519 }
520 svga->rebind.flags.gs = 0;
521
522 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
523 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
524 if (ret != PIPE_OK)
525 return ret;
526 }
527 svga->rebind.flags.fs = 0;
528
529 return PIPE_OK;
530 }