1 /**********************************************************
2 * Copyright 2014 VMware, Inc. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 **********************************************************/
26 #include "util/u_memory.h"
27 #include "util/u_bitmask.h"
30 #include "svga_context.h"
31 #include "svga_resource_buffer.h"
32 #include "svga_shader.h"
33 #include "svga_debug.h"
34 #include "svga_streamout.h"
36 struct svga_stream_output_target
{
37 struct pipe_stream_output_target base
;
41 static inline struct svga_stream_output_target
*
42 svga_stream_output_target(struct pipe_stream_output_target
*s
)
44 return (struct svga_stream_output_target
*)s
;
49 * A helper function to send different version of the DefineStreamOutput command
50 * depending on if device is SM5 capable or not.
52 static enum pipe_error
53 svga_define_stream_output(struct svga_context
*svga
,
54 SVGA3dStreamOutputId soid
,
55 uint32 numOutputStreamEntries
,
56 uint32 numOutputStreamStrides
,
57 uint32 streamStrides
[SVGA3D_DX_MAX_SOTARGETS
],
58 const SVGA3dStreamOutputDeclarationEntry decls
[SVGA3D_MAX_STREAMOUT_DECLS
],
59 uint32 rasterizedStream
,
60 struct svga_stream_output
*streamout
)
64 SVGA_DBG(DEBUG_STREAMOUT
, "%s: id=%d\n", __FUNCTION__
, soid
);
65 SVGA_DBG(DEBUG_STREAMOUT
,
66 "numOutputStreamEntires=%d\n", numOutputStreamEntries
);
68 for (i
= 0; i
< numOutputStreamEntries
; i
++) {
69 SVGA_DBG(DEBUG_STREAMOUT
,
70 " %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n",
71 i
, decls
[i
].outputSlot
, decls
[i
].registerIndex
,
72 decls
[i
].registerMask
, decls
[i
].stream
);
75 SVGA_DBG(DEBUG_STREAMOUT
,
76 "numOutputStreamStrides=%d\n", numOutputStreamStrides
);
77 for (i
= 0; i
< numOutputStreamStrides
; i
++) {
78 SVGA_DBG(DEBUG_STREAMOUT
, " %d ", streamStrides
[i
]);
80 SVGA_DBG(DEBUG_STREAMOUT
, "\n");
82 if (svga_have_sm5(svga
) &&
83 (numOutputStreamEntries
> SVGA3D_MAX_DX10_STREAMOUT_DECLS
||
84 numOutputStreamStrides
> 1)) {
85 unsigned bufSize
= sizeof(SVGA3dStreamOutputDeclarationEntry
)
86 * numOutputStreamEntries
;
87 struct svga_winsys_buffer
*declBuf
;
88 struct svga_winsys_screen
*sws
= svga_screen(svga
->pipe
.screen
)->sws
;
91 declBuf
= svga_winsys_buffer_create(svga
, 1, SVGA_BUFFER_USAGE_PINNED
,
95 map
= sws
->buffer_map(sws
, declBuf
, PIPE_TRANSFER_WRITE
);
97 sws
->buffer_destroy(sws
, declBuf
);
101 /* copy decls to buffer */
102 memcpy(map
, decls
, bufSize
);
105 sws
->buffer_unmap(sws
, declBuf
);
106 streamout
->declBuf
= declBuf
;
108 SVGA_RETRY(svga
, SVGA3D_sm5_DefineAndBindStreamOutput
110 numOutputStreamEntries
,
111 numOutputStreamStrides
,
117 SVGA_RETRY(svga
, SVGA3D_vgpu10_DefineStreamOutput(svga
->swc
, soid
,
118 numOutputStreamEntries
,
128 * Creates stream output from the stream output info.
130 struct svga_stream_output
*
131 svga_create_stream_output(struct svga_context
*svga
,
132 struct svga_shader
*shader
,
133 const struct pipe_stream_output_info
*info
)
135 struct svga_stream_output
*streamout
;
136 SVGA3dStreamOutputDeclarationEntry decls
[SVGA3D_MAX_STREAMOUT_DECLS
];
137 unsigned strides
[SVGA3D_DX_MAX_SOTARGETS
];
138 unsigned dstOffset
[SVGA3D_DX_MAX_SOTARGETS
];
139 unsigned numStreamStrides
= 0;
144 ASSERTED
unsigned maxDecls
;
146 assert(info
->num_outputs
<= PIPE_MAX_SO_OUTPUTS
);
148 /* Gallium utility creates shaders with stream output.
149 * For non-DX10, just return NULL.
151 if (!svga_have_vgpu10(svga
))
154 if (svga_have_sm5(svga
))
155 maxDecls
= SVGA3D_MAX_STREAMOUT_DECLS
;
156 else if (svga_have_vgpu10(svga
))
157 maxDecls
= SVGA3D_MAX_DX10_STREAMOUT_DECLS
;
159 assert(info
->num_outputs
<= maxDecls
);
161 /* Allocate an integer ID for the stream output */
162 id
= util_bitmask_add(svga
->stream_output_id_bm
);
163 if (id
== UTIL_BITMASK_INVALID_INDEX
) {
167 /* Allocate the streamout data structure */
168 streamout
= CALLOC_STRUCT(svga_stream_output
);
173 streamout
->info
= *info
;
175 streamout
->pos_out_index
= -1;
176 streamout
->streammask
= 0;
178 /* Init whole decls and stride arrays to zero to avoid garbage values */
179 memset(decls
, 0, sizeof(decls
));
180 memset(strides
, 0, sizeof(strides
));
181 memset(dstOffset
, 0, sizeof(dstOffset
));
183 SVGA_DBG(DEBUG_STREAMOUT
, "%s: num_outputs\n",
184 __FUNCTION__
, info
->num_outputs
);
186 for (i
= 0, numDecls
= 0; i
< info
->num_outputs
; i
++, numDecls
++) {
187 unsigned reg_idx
= info
->output
[i
].register_index
;
188 unsigned buf_idx
= info
->output
[i
].output_buffer
;
189 const enum tgsi_semantic sem_name
=
190 shader
->info
.output_semantic_name
[reg_idx
];
192 assert(buf_idx
<= PIPE_MAX_SO_BUFFERS
);
194 numStreamStrides
= MAX2(numStreamStrides
, buf_idx
);
196 SVGA_DBG(DEBUG_STREAMOUT
,
197 " %d: register_index=%d output_buffer=%d stream=%d\n",
198 i
, reg_idx
, buf_idx
, info
->output
[i
].stream
);
200 SVGA_DBG(DEBUG_STREAMOUT
,
201 " dst_offset=%d start_component=%d num_components=%d\n",
202 info
->output
[i
].dst_offset
,
203 info
->output
[i
].start_component
,
204 info
->output
[i
].num_components
);
206 streamout
->buffer_stream
|= info
->output
[i
].stream
<< (buf_idx
* 4);
209 * Check if the destination offset of the current output
210 * is at the expected offset. If it is greater, then that means
211 * there is a gap in the stream output. We need to insert
212 * extra declaration entries with an invalid register index
215 while (info
->output
[i
].dst_offset
> dstOffset
[buf_idx
]) {
217 unsigned numComponents
= info
->output
[i
].dst_offset
-
220 assert(svga_have_sm5(svga
));
222 /* We can only specify at most 4 components to skip in each
225 numComponents
= numComponents
> 4 ? 4 : numComponents
;
227 decls
[numDecls
].outputSlot
= buf_idx
,
228 decls
[numDecls
].stream
= info
->output
[i
].stream
;
229 decls
[numDecls
].registerIndex
= SVGA3D_INVALID_ID
;
230 decls
[numDecls
].registerMask
= (1 << numComponents
) - 1;
232 dstOffset
[buf_idx
] += numComponents
;
236 if (sem_name
== TGSI_SEMANTIC_POSITION
) {
238 * Check if streaming out POSITION. If so, replace the
239 * register index with the index for NON_ADJUSTED POSITION.
241 decls
[numDecls
].registerIndex
= shader
->info
.num_outputs
;
243 /* Save this output index, so we can tell later if this stream output
244 * includes an output of a vertex position
246 streamout
->pos_out_index
= numDecls
;
248 else if (sem_name
== TGSI_SEMANTIC_CLIPDIST
) {
250 * Use the shadow copy for clip distance because
251 * CLIPDIST instruction is only emitted for enabled clip planes.
252 * It's valid to write to ClipDistance variable for non-enabled
255 decls
[numDecls
].registerIndex
=
256 shader
->info
.num_outputs
+ 1 +
257 shader
->info
.output_semantic_index
[reg_idx
];
260 decls
[numDecls
].registerIndex
= reg_idx
;
263 decls
[numDecls
].outputSlot
= buf_idx
;
264 decls
[numDecls
].registerMask
=
265 ((1 << info
->output
[i
].num_components
) - 1)
266 << info
->output
[i
].start_component
;
268 decls
[numDecls
].stream
= info
->output
[i
].stream
;
269 assert(decls
[numDecls
].stream
== 0 || svga_have_sm5(svga
));
271 /* Set the bit in streammask for the enabled stream */
272 streamout
->streammask
|= 1 << info
->output
[i
].stream
;
274 /* Update the expected offset for the next output */
275 dstOffset
[buf_idx
] += info
->output
[i
].num_components
;
277 strides
[buf_idx
] = info
->stride
[buf_idx
] * sizeof(float);
280 assert(numDecls
<= maxDecls
);
282 /* Send the DefineStreamOutput command.
283 * Note, rasterizedStream is always 0.
285 ret
= svga_define_stream_output(svga
, id
,
286 numDecls
, numStreamStrides
+1,
287 strides
, decls
, 0, streamout
);
289 if (ret
!= PIPE_OK
) {
290 util_bitmask_clear(svga
->stream_output_id_bm
, id
);
299 svga_set_stream_output(struct svga_context
*svga
,
300 struct svga_stream_output
*streamout
)
302 unsigned id
= streamout
? streamout
->id
: SVGA3D_INVALID_ID
;
304 if (!svga_have_vgpu10(svga
)) {
308 SVGA_DBG(DEBUG_STREAMOUT
, "%s streamout=0x%x id=%d\n", __FUNCTION__
,
311 if (svga
->current_so
!= streamout
) {
313 /* Before unbinding the current stream output, stop the stream output
314 * statistics queries for the active streams.
316 if (svga_have_sm5(svga
) && svga
->current_so
) {
317 svga
->vcount_buffer_stream
= svga
->current_so
->buffer_stream
;
318 svga_end_stream_output_queries(svga
, svga
->current_so
->streammask
);
321 enum pipe_error ret
= SVGA3D_vgpu10_SetStreamOutput(svga
->swc
, id
);
322 if (ret
!= PIPE_OK
) {
326 svga
->current_so
= streamout
;
328 /* After binding the new stream output, start the stream output
329 * statistics queries for the active streams.
331 if (svga_have_sm5(svga
) && svga
->current_so
) {
332 svga_begin_stream_output_queries(svga
, svga
->current_so
->streammask
);
340 svga_delete_stream_output(struct svga_context
*svga
,
341 struct svga_stream_output
*streamout
)
343 struct svga_winsys_screen
*sws
= svga_screen(svga
->pipe
.screen
)->sws
;
345 SVGA_DBG(DEBUG_STREAMOUT
, "%s streamout=0x%x\n", __FUNCTION__
, streamout
);
347 assert(svga_have_vgpu10(svga
));
348 assert(streamout
!= NULL
);
350 SVGA_RETRY(svga
, SVGA3D_vgpu10_DestroyStreamOutput(svga
->swc
,
353 if (svga_have_sm5(svga
) && streamout
->declBuf
) {
354 sws
->buffer_destroy(sws
, streamout
->declBuf
);
357 /* Before deleting the current streamout, make sure to stop any pending
360 if (svga
->current_so
== streamout
) {
361 if (svga
->in_streamout
)
362 svga_end_stream_output_queries(svga
, svga
->current_so
->streammask
);
363 svga
->current_so
= NULL
;
367 util_bitmask_clear(svga
->stream_output_id_bm
, streamout
->id
);
369 /* Free streamout structure */
374 static struct pipe_stream_output_target
*
375 svga_create_stream_output_target(struct pipe_context
*pipe
,
376 struct pipe_resource
*buffer
,
377 unsigned buffer_offset
,
378 unsigned buffer_size
)
380 struct svga_context
*svga
= svga_context(pipe
);
381 struct svga_stream_output_target
*sot
;
383 SVGA_DBG(DEBUG_STREAMOUT
, "%s offset=%d size=%d\n", __FUNCTION__
,
384 buffer_offset
, buffer_size
);
386 assert(svga_have_vgpu10(svga
));
389 sot
= CALLOC_STRUCT(svga_stream_output_target
);
393 pipe_reference_init(&sot
->base
.reference
, 1);
394 pipe_resource_reference(&sot
->base
.buffer
, buffer
);
395 sot
->base
.context
= pipe
;
396 sot
->base
.buffer
= buffer
;
397 sot
->base
.buffer_offset
= buffer_offset
;
398 sot
->base
.buffer_size
= buffer_size
;
404 svga_destroy_stream_output_target(struct pipe_context
*pipe
,
405 struct pipe_stream_output_target
*target
)
407 struct svga_stream_output_target
*sot
= svga_stream_output_target(target
);
409 SVGA_DBG(DEBUG_STREAMOUT
, "%s\n", __FUNCTION__
);
411 pipe_resource_reference(&sot
->base
.buffer
, NULL
);
416 svga_set_stream_output_targets(struct pipe_context
*pipe
,
417 unsigned num_targets
,
418 struct pipe_stream_output_target
**targets
,
419 const unsigned *offsets
)
421 struct svga_context
*svga
= svga_context(pipe
);
422 struct SVGA3dSoTarget soBindings
[SVGA3D_DX_MAX_SOTARGETS
];
424 unsigned num_so_targets
;
425 boolean begin_so_queries
= num_targets
> 0;
427 SVGA_DBG(DEBUG_STREAMOUT
, "%s num_targets=%d\n", __FUNCTION__
,
430 assert(svga_have_vgpu10(svga
));
432 /* Mark the streamout buffers as dirty so that we'll issue readbacks
435 for (i
= 0; i
< svga
->num_so_targets
; i
++) {
436 struct svga_buffer
*sbuf
= svga_buffer(svga
->so_targets
[i
]->buffer
);
440 /* Before the currently bound streamout targets are unbound,
441 * save them in case they need to be referenced to retrieve the
442 * number of vertices being streamed out.
444 for (i
= 0; i
< ARRAY_SIZE(svga
->so_targets
); i
++) {
445 svga
->vcount_so_targets
[i
] = svga
->so_targets
[i
];
448 assert(num_targets
<= SVGA3D_DX_MAX_SOTARGETS
);
450 for (i
= 0; i
< num_targets
; i
++) {
451 struct svga_stream_output_target
*sot
452 = svga_stream_output_target(targets
[i
]);
455 svga
->so_surfaces
[i
] = svga_buffer_handle(svga
, sot
->base
.buffer
,
456 PIPE_BIND_STREAM_OUTPUT
);
458 assert(svga_buffer(sot
->base
.buffer
)->key
.flags
459 & SVGA3D_SURFACE_BIND_STREAM_OUTPUT
);
461 svga
->so_targets
[i
] = &sot
->base
;
462 if (offsets
[i
] == -1) {
463 soBindings
[i
].offset
= -1;
465 /* The streamout is being resumed. There is no need to restart streamout statistics
466 * queries for the draw-auto fallback since those queries are still active.
468 begin_so_queries
= FALSE
;
471 soBindings
[i
].offset
= sot
->base
.buffer_offset
+ offsets
[i
];
473 /* The size cannot extend beyond the end of the buffer. Clamp it. */
474 size
= MIN2(sot
->base
.buffer_size
,
475 sot
->base
.buffer
->width0
- sot
->base
.buffer_offset
);
477 soBindings
[i
].sizeInBytes
= size
;
480 /* unbind any previously bound stream output buffers */
481 for (; i
< svga
->num_so_targets
; i
++) {
482 svga
->so_surfaces
[i
] = NULL
;
483 svga
->so_targets
[i
] = NULL
;
486 num_so_targets
= MAX2(svga
->num_so_targets
, num_targets
);
487 SVGA_RETRY(svga
, SVGA3D_vgpu10_SetSOTargets(svga
->swc
, num_so_targets
,
488 soBindings
, svga
->so_surfaces
));
489 svga
->num_so_targets
= num_targets
;
491 if (svga_have_sm5(svga
) && svga
->current_so
&& begin_so_queries
) {
493 /* If there are aleady active queries and we need to start a new streamout,
494 * we need to stop the current active queries first.
496 if (svga
->in_streamout
) {
497 svga_end_stream_output_queries(svga
, svga
->current_so
->streammask
);
500 /* Start stream out statistics queries for the new streamout */
501 svga_begin_stream_output_queries(svga
, svga
->current_so
->streammask
);
506 * Rebind stream output target surfaces
509 svga_rebind_stream_output_targets(struct svga_context
*svga
)
511 struct svga_winsys_context
*swc
= svga
->swc
;
515 for (i
= 0; i
< svga
->num_so_targets
; i
++) {
516 ret
= swc
->resource_rebind(swc
, svga
->so_surfaces
[i
], NULL
, SVGA_RELOC_WRITE
);
526 svga_init_stream_output_functions(struct svga_context
*svga
)
528 svga
->pipe
.create_stream_output_target
= svga_create_stream_output_target
;
529 svga
->pipe
.stream_output_target_destroy
= svga_destroy_stream_output_target
;
530 svga
->pipe
.set_stream_output_targets
= svga_set_stream_output_targets
;
535 * A helper function to create stream output statistics queries for each stream.
536 * These queries are created as a workaround for DrawTransformFeedbackInstanced or
537 * DrawTransformFeedbackStreamInstanced when auto draw doesn't support
538 * instancing or non-0 stream. In this case, the vertex count will
539 * be retrieved from the stream output statistics query.
542 svga_create_stream_output_queries(struct svga_context
*svga
)
546 if (!svga_have_sm5(svga
))
549 for (i
= 0; i
< ARRAY_SIZE(svga
->so_queries
); i
++) {
550 svga
->so_queries
[i
] = svga
->pipe
.create_query(&svga
->pipe
,
551 PIPE_QUERY_SO_STATISTICS
, i
);
552 assert(svga
->so_queries
[i
] != NULL
);
558 * Destroy the stream output statistics queries for the draw-auto workaround.
561 svga_destroy_stream_output_queries(struct svga_context
*svga
)
565 if (!svga_have_sm5(svga
))
568 for (i
= 0; i
< ARRAY_SIZE(svga
->so_queries
); i
++) {
569 svga
->pipe
.destroy_query(&svga
->pipe
, svga
->so_queries
[i
]);
575 * Start stream output statistics queries for the active streams.
578 svga_begin_stream_output_queries(struct svga_context
*svga
,
581 assert(svga_have_sm5(svga
));
582 assert(!svga
->in_streamout
);
584 for (unsigned i
= 0; i
< ARRAY_SIZE(svga
->so_queries
); i
++) {
586 if (streammask
& (1 << i
)) {
587 ret
= svga
->pipe
.begin_query(&svga
->pipe
, svga
->so_queries
[i
]);
591 svga
->in_streamout
= TRUE
;
598 * Stop stream output statistics queries for the active streams.
601 svga_end_stream_output_queries(struct svga_context
*svga
,
604 assert(svga_have_sm5(svga
));
606 if (!svga
->in_streamout
)
609 for (unsigned i
= 0; i
< ARRAY_SIZE(svga
->so_queries
); i
++) {
611 if (streammask
& (1 << i
)) {
612 ret
= svga
->pipe
.end_query(&svga
->pipe
, svga
->so_queries
[i
]);
616 svga
->in_streamout
= FALSE
;
623 * Return the primitive count returned from the stream output statistics query
624 * for the specified stream.
627 svga_get_primcount_from_stream_output(struct svga_context
*svga
,
630 unsigned primcount
= 0;
631 union pipe_query_result result
;
634 if (svga
->current_so
) {
635 svga_end_stream_output_queries(svga
, svga
->current_so
->streammask
);
638 ret
= svga
->pipe
.get_query_result(&svga
->pipe
,
639 svga
->so_queries
[stream
],
642 primcount
= result
.so_statistics
.num_primitives_written
;