svga: Performance fixes
[mesa.git] / src / gallium / drivers / svga / svga_state_tgsi_transform.c
1 /**********************************************************
2 * Copyright 2014 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "util/u_inlines.h"
27 #include "util/u_memory.h"
28 #include "util/u_bitmask.h"
29 #include "util/u_simple_shaders.h"
30 #include "tgsi/tgsi_ureg.h"
31 #include "tgsi/tgsi_point_sprite.h"
32 #include "tgsi/tgsi_dynamic_indexing.h"
33 #include "tgsi/tgsi_vpos.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_info.h"
36
37 #include "svga_context.h"
38 #include "svga_shader.h"
39 #include "svga_tgsi.h"
40
41
42 /**
43 * Bind a new GS. This updates the derived current gs state, not the
44 * user-specified GS state.
45 */
46 static void
47 bind_gs_state(struct svga_context *svga,
48 struct svga_geometry_shader *gs)
49 {
50 svga->curr.gs = gs;
51 svga->dirty |= SVGA_NEW_GS;
52 }
53
54
55 static void
56 insert_at_head(struct svga_shader *head, struct svga_shader *shader)
57 {
58 shader->parent = head;
59 shader->next = head->next;
60 head->next = shader;
61 }
62
63
64 /**
65 * Bind shader
66 */
67 static void
68 bind_shader(struct svga_context *svga,
69 const enum pipe_shader_type shader_type,
70 struct svga_shader *shader)
71 {
72 switch (shader_type) {
73 case PIPE_SHADER_VERTEX:
74 svga->pipe.bind_vs_state(&svga->pipe, shader);
75 break;
76 case PIPE_SHADER_FRAGMENT:
77 /**
78 * Avoid pipe->bind_fs_state call because it goes through aapoint
79 * layer. We loose linked list of all transformed shaders if aapoint
80 * is used.
81 */
82 svga_bind_fs_state(&svga->pipe, shader);
83 break;
84 case PIPE_SHADER_GEOMETRY:
85 svga->pipe.bind_gs_state(&svga->pipe, shader);
86 break;
87 case PIPE_SHADER_TESS_CTRL:
88 svga->pipe.bind_tcs_state(&svga->pipe, shader);
89 break;
90 case PIPE_SHADER_TESS_EVAL:
91 svga->pipe.bind_tes_state(&svga->pipe, shader);
92 break;
93 default:
94 return;
95 }
96 }
97
98
99
100 /**
101 * Create shader
102 */
103 static void *
104 create_shader(struct svga_context *svga,
105 const enum pipe_shader_type shader_type,
106 struct pipe_shader_state *state)
107 {
108 switch (shader_type) {
109 case PIPE_SHADER_VERTEX:
110 return svga->pipe.create_vs_state(&svga->pipe, state);
111 case PIPE_SHADER_FRAGMENT:
112 /**
113 * Avoid pipe->create_fs_state call because it goes through aapoint
114 * layer. We loose linked list of all transformed shaders if aapoint
115 * is used.
116 */
117 return svga_create_fs_state(&svga->pipe, state);
118 case PIPE_SHADER_GEOMETRY:
119 return svga->pipe.create_gs_state(&svga->pipe, state);
120 case PIPE_SHADER_TESS_CTRL:
121 return svga->pipe.create_tcs_state(&svga->pipe, state);
122 case PIPE_SHADER_TESS_EVAL:
123 return svga->pipe.create_tes_state(&svga->pipe, state);
124 default:
125 return NULL;
126 }
127 }
128
129
130 static void
131 write_vpos(struct svga_context *svga,
132 struct svga_shader *shader)
133 {
134 struct svga_token_key key;
135 boolean use_existing = FALSE;
136 struct svga_shader *transform_shader;
137 const struct tgsi_shader_info *info = &shader->info;
138
139 /* Create a token key */
140 memset(&key, 0, sizeof key);
141 key.vs.write_position = 1;
142
143 if (shader->next) {
144 transform_shader = svga_search_shader_token_key(shader->next, &key);
145 if (transform_shader) {
146 use_existing = TRUE;
147 }
148 }
149
150 if (!use_existing) {
151 struct pipe_shader_state state;
152 struct tgsi_token *new_tokens = NULL;
153
154 new_tokens = tgsi_write_vpos(shader->tokens,
155 info->immediate_count);
156 if (!new_tokens)
157 return;
158
159 pipe_shader_state_from_tgsi(&state, new_tokens);
160
161 transform_shader = create_shader(svga, info->processor, &state);
162 insert_at_head(shader, transform_shader);
163 FREE(new_tokens);
164 }
165 transform_shader->token_key = key;
166 bind_shader(svga, info->processor, transform_shader);
167 }
168
169
170 /**
171 * transform_dynamic_indexing searches shader variant list to see if
172 * we have transformed shader for dynamic indexing and reuse/bind it. If we
173 * don't have transformed shader, then it will create new shader from which
174 * dynamic indexing will be removed. It will also be added to the shader
175 * variant list and this new shader will be bind to current svga state.
176 */
177 static void
178 transform_dynamic_indexing(struct svga_context *svga,
179 struct svga_shader *shader)
180 {
181 struct svga_token_key key;
182 boolean use_existing = FALSE;
183 struct svga_shader *transform_shader;
184 const struct tgsi_shader_info *info = &shader->info;
185
186 /* Create a token key */
187 memset(&key, 0, sizeof key);
188 key.dynamic_indexing = 1;
189
190 if (shader->next) {
191 transform_shader = svga_search_shader_token_key(shader->next, &key);
192 if (transform_shader) {
193 use_existing = TRUE;
194 }
195 }
196
197 struct tgsi_token *new_tokens = NULL;
198
199 if (!use_existing) {
200 struct pipe_shader_state state;
201 new_tokens = tgsi_remove_dynamic_indexing(shader->tokens,
202 info->const_buffers_declared,
203 info->samplers_declared,
204 info->immediate_count);
205 if (!new_tokens)
206 return;
207
208 pipe_shader_state_from_tgsi(&state, new_tokens);
209
210 transform_shader = create_shader(svga, info->processor, &state);
211 insert_at_head(shader, transform_shader);
212 }
213 transform_shader->token_key = key;
214 bind_shader(svga, info->processor, transform_shader);
215 if (new_tokens)
216 FREE(new_tokens);
217 }
218
219
220 /**
221 * emulate_point_sprite searches the shader variants list to see it there is
222 * a shader variant with a token string that matches the emulation
223 * requirement. It there isn't, then it will use a tgsi utility
224 * tgsi_add_point_sprite to transform the original token string to support
225 * point sprite. A new geometry shader state will be created with the
226 * transformed token string and added to the shader variants list of the
227 * original geometry shader. The new geometry shader state will then be
228 * bound as the current geometry shader.
229 */
230 static struct svga_shader *
231 emulate_point_sprite(struct svga_context *svga,
232 struct svga_shader *shader,
233 const struct tgsi_token *tokens)
234 {
235 struct svga_token_key key;
236 struct tgsi_token *new_tokens;
237 const struct tgsi_token *orig_tokens;
238 struct svga_geometry_shader *orig_gs = (struct svga_geometry_shader *)shader;
239 struct svga_geometry_shader *gs = NULL;
240 struct pipe_shader_state templ = {0};
241 struct svga_stream_output *streamout = NULL;
242 int pos_out_index = -1;
243 int aa_point_coord_index = -1;
244
245 assert(tokens != NULL);
246
247 orig_tokens = tokens;
248
249 /* Create a token key */
250 memset(&key, 0, sizeof key);
251 key.gs.writes_psize = 1;
252 key.gs.sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
253
254 key.gs.sprite_origin_upper_left =
255 !(svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
256
257 key.gs.aa_point = svga->curr.rast->templ.point_smooth;
258
259 if (orig_gs) {
260
261 /* Check if the original geometry shader has stream output and
262 * if position is one of the outputs.
263 */
264 streamout = orig_gs->base.stream_output;
265 if (streamout) {
266 pos_out_index = streamout->pos_out_index;
267 key.gs.point_pos_stream_out = pos_out_index != -1;
268 }
269
270 /* Search the shader lists to see if there is a variant that matches
271 * this token key.
272 */
273 gs = (struct svga_geometry_shader *)
274 svga_search_shader_token_key(&orig_gs->base, &key);
275 }
276
277 /* If there isn't, then call the tgsi utility tgsi_add_point_sprite
278 * to transform the original tokens to support point sprite.
279 * Flip the sprite origin as SVGA3D device only supports an
280 * upper-left origin.
281 */
282 if (!gs) {
283 new_tokens = tgsi_add_point_sprite(orig_tokens,
284 key.gs.sprite_coord_enable,
285 key.gs.sprite_origin_upper_left,
286 key.gs.point_pos_stream_out,
287 key.gs.aa_point ?
288 &aa_point_coord_index : NULL);
289
290 if (!new_tokens) {
291 /* if no new tokens are generated for whatever reason, just return */
292 return NULL;
293 }
294
295 if (0) {
296 debug_printf("Before tgsi_add_point_sprite ---------------\n");
297 tgsi_dump(orig_tokens, 0);
298 debug_printf("After tgsi_add_point_sprite --------------\n");
299 tgsi_dump(new_tokens, 0);
300 }
301
302 pipe_shader_state_from_tgsi(&templ, new_tokens);
303 templ.stream_output.num_outputs = 0;
304
305 if (streamout) {
306 templ.stream_output = streamout->info;
307 /* The tgsi_add_point_sprite utility adds an extra output
308 * for the original point position for stream output purpose.
309 * We need to replace the position output register index in the
310 * stream output declaration with the new register index.
311 */
312 if (pos_out_index != -1) {
313 assert(orig_gs != NULL);
314 templ.stream_output.output[pos_out_index].register_index =
315 orig_gs->base.info.num_outputs;
316 }
317 }
318
319 /* Create a new geometry shader state with the new tokens */
320 gs = svga->pipe.create_gs_state(&svga->pipe, &templ);
321
322 /* Don't need the token string anymore. There is a local copy
323 * in the shader state.
324 */
325 FREE(new_tokens);
326
327 if (!gs) {
328 return NULL;
329 }
330
331 gs->wide_point = TRUE;
332 gs->aa_point_coord_index = aa_point_coord_index;
333 gs->base.token_key = key;
334 gs->base.parent = &orig_gs->base;
335 gs->base.next = NULL;
336
337 /* Add the new geometry shader to the head of the shader list
338 * pointed to by the original geometry shader.
339 */
340 if (orig_gs) {
341 gs->base.next = orig_gs->base.next;
342 orig_gs->base.next = &gs->base;
343 }
344 }
345
346 /* Bind the new geometry shader state */
347 bind_gs_state(svga, gs);
348
349 return &gs->base;
350 }
351
352 /**
353 * Generate a geometry shader that emits a wide point by drawing a quad.
354 * This function first creates a passthrough geometry shader and then
355 * calls emulate_point_sprite() to transform the geometry shader to
356 * support point sprite.
357 */
358 static struct svga_shader *
359 add_point_sprite_shader(struct svga_context *svga)
360 {
361 struct svga_vertex_shader *vs = svga->curr.vs;
362 struct svga_geometry_shader *orig_gs = vs->gs;
363 struct svga_geometry_shader *new_gs;
364 const struct tgsi_token *tokens;
365
366 if (orig_gs == NULL) {
367
368 /* If this is the first time adding a geometry shader to this
369 * vertex shader to support point sprite, then create
370 * a passthrough geometry shader first.
371 */
372 orig_gs = (struct svga_geometry_shader *)
373 util_make_geometry_passthrough_shader(
374 &svga->pipe, vs->base.info.num_outputs,
375 vs->base.info.output_semantic_name,
376 vs->base.info.output_semantic_index);
377
378 if (!orig_gs)
379 return NULL;
380 }
381 else {
382 if (orig_gs->base.parent)
383 orig_gs = (struct svga_geometry_shader *)orig_gs->base.parent;
384 }
385 tokens = orig_gs->base.tokens;
386
387 /* Call emulate_point_sprite to find or create a transformed
388 * geometry shader for supporting point sprite.
389 */
390 new_gs = (struct svga_geometry_shader *)
391 emulate_point_sprite(svga, &orig_gs->base, tokens);
392
393 /* If this is the first time creating a geometry shader to
394 * support vertex point size, then add the new geometry shader
395 * to the vertex shader.
396 */
397 if (vs->gs == NULL) {
398 vs->gs = new_gs;
399 }
400
401 return &new_gs->base;
402 }
403
404
405 static boolean
406 has_dynamic_indexing(const struct tgsi_shader_info *info)
407 {
408 return (info->dim_indirect_files & (1u << TGSI_FILE_CONSTANT)) ||
409 (info->indirect_files & (1u << TGSI_FILE_SAMPLER));
410 }
411
412
413 /* update_tgsi_transform provides a hook to transform a shader if needed.
414 */
415 static enum pipe_error
416 update_tgsi_transform(struct svga_context *svga, uint64_t dirty)
417 {
418 struct svga_geometry_shader *gs = svga->curr.user_gs; /* current gs */
419 struct svga_vertex_shader *vs = svga->curr.vs; /* currently bound vs */
420 struct svga_fragment_shader *fs = svga->curr.fs; /* currently bound fs */
421 struct svga_tcs_shader *tcs = svga->curr.tcs; /* currently bound tcs */
422 struct svga_tes_shader *tes = svga->curr.tes; /* currently bound tes */
423 struct svga_shader *orig_gs; /* original gs */
424 struct svga_shader *new_gs; /* new gs */
425
426 assert(svga_have_vgpu10(svga));
427
428 if (vs->base.info.num_outputs == 0) {
429 write_vpos(svga, &vs->base);
430 }
431
432 if (vs && has_dynamic_indexing(&vs->base.info)) {
433 transform_dynamic_indexing(svga, &vs->base);
434 }
435 if (fs && has_dynamic_indexing(&fs->base.info)) {
436 transform_dynamic_indexing(svga, &fs->base);
437 }
438 if (gs && has_dynamic_indexing(&gs->base.info)) {
439 transform_dynamic_indexing(svga, &gs->base);
440 }
441 if (tcs && has_dynamic_indexing(&tcs->base.info)) {
442 transform_dynamic_indexing(svga, &tcs->base);
443 }
444 if (tes && has_dynamic_indexing(&tes->base.info)) {
445 transform_dynamic_indexing(svga, &tes->base);
446 }
447
448 if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
449 /* If the current prim type is POINTS and the current geometry shader
450 * emits wide points, transform the shader to emulate wide points using
451 * quads. NOTE: we don't do emulation of wide points in GS when
452 * transform feedback is enabled.
453 */
454 if (gs != NULL && !gs->base.stream_output &&
455 (gs->base.info.writes_psize || gs->wide_point)) {
456 orig_gs = gs->base.parent ? gs->base.parent : &gs->base;
457 new_gs = emulate_point_sprite(svga, orig_gs, orig_gs->tokens);
458 }
459
460 /* If there is not an active geometry shader and the current vertex
461 * shader emits wide point then create a new geometry shader to emulate
462 * wide point.
463 */
464 else if (gs == NULL && !vs->base.stream_output &&
465 (svga->curr.rast->pointsize > 1.0 ||
466 vs->base.info.writes_psize)) {
467 new_gs = add_point_sprite_shader(svga);
468 }
469 else {
470 /* use the user's GS */
471 bind_gs_state(svga, svga->curr.user_gs);
472 }
473 }
474 else if (svga->curr.gs != svga->curr.user_gs) {
475 /* If current primitive type is not POINTS, then make sure
476 * we don't bind to any of the generated geometry shader
477 */
478 bind_gs_state(svga, svga->curr.user_gs);
479 }
480 (void) new_gs; /* silence the unused var warning */
481
482 return PIPE_OK;
483 }
484
485 struct svga_tracked_state svga_need_tgsi_transform =
486 {
487 "transform shader for optimization",
488 (SVGA_NEW_VS |
489 SVGA_NEW_FS |
490 SVGA_NEW_GS |
491 SVGA_NEW_REDUCED_PRIMITIVE |
492 SVGA_NEW_RAST),
493 update_tgsi_transform
494 };