2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * Code to initialize the binding table entries used by transform feedback.
29 #include "main/bufferobj.h"
30 #include "main/macros.h"
31 #include "brw_context.h"
32 #include "intel_batchbuffer.h"
33 #include "brw_defines.h"
34 #include "brw_state.h"
35 #include "main/transformfeedback.h"
38 gen6_update_sol_surfaces(struct brw_context
*brw
)
40 struct gl_context
*ctx
= &brw
->ctx
;
41 bool xfb_active
= _mesa_is_xfb_active_and_unpaused(ctx
);
42 struct gl_transform_feedback_object
*xfb_obj
;
43 const struct gl_transform_feedback_info
*linked_xfb_info
= NULL
;
46 /* BRW_NEW_TRANSFORM_FEEDBACK */
47 xfb_obj
= ctx
->TransformFeedback
.CurrentObject
;
48 linked_xfb_info
= xfb_obj
->program
->sh
.LinkedTransformFeedback
;
51 for (int i
= 0; i
< BRW_MAX_SOL_BINDINGS
; ++i
) {
52 const int surf_index
= BRW_GEN6_SOL_BINDING_START
+ i
;
53 if (xfb_active
&& i
< linked_xfb_info
->NumOutputs
) {
54 unsigned buffer
= linked_xfb_info
->Outputs
[i
].OutputBuffer
;
55 unsigned buffer_offset
=
56 xfb_obj
->Offset
[buffer
] / 4 +
57 linked_xfb_info
->Outputs
[i
].DstOffset
;
58 if (brw
->programs
[MESA_SHADER_GEOMETRY
]) {
59 brw_update_sol_surface(
60 brw
, xfb_obj
->Buffers
[buffer
],
61 &brw
->gs
.base
.surf_offset
[surf_index
],
62 linked_xfb_info
->Outputs
[i
].NumComponents
,
63 linked_xfb_info
->Buffers
[buffer
].Stride
, buffer_offset
);
65 brw_update_sol_surface(
66 brw
, xfb_obj
->Buffers
[buffer
],
67 &brw
->ff_gs
.surf_offset
[surf_index
],
68 linked_xfb_info
->Outputs
[i
].NumComponents
,
69 linked_xfb_info
->Buffers
[buffer
].Stride
, buffer_offset
);
72 if (!brw
->programs
[MESA_SHADER_GEOMETRY
])
73 brw
->ff_gs
.surf_offset
[surf_index
] = 0;
75 brw
->gs
.base
.surf_offset
[surf_index
] = 0;
79 brw
->ctx
.NewDriverState
|= BRW_NEW_SURFACES
;
82 const struct brw_tracked_state gen6_sol_surface
= {
85 .brw
= BRW_NEW_BATCH
|
87 BRW_NEW_TRANSFORM_FEEDBACK
,
89 .emit
= gen6_update_sol_surfaces
,
93 * Constructs the binding table for the WM surface state, which maps unit
94 * numbers to surface state objects.
97 brw_gs_upload_binding_table(struct brw_context
*brw
)
100 struct gl_context
*ctx
= &brw
->ctx
;
101 const struct gl_program
*prog
;
102 bool need_binding_table
= false;
104 /* We have two scenarios here:
105 * 1) We are using a geometry shader only to implement transform feedback
106 * for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL).
107 * In this case, we only need surfaces for transform feedback in the
109 * 2) We have a user-provided geometry shader. In this case we may need
110 * surfaces for transform feedback and/or other stuff, like textures,
114 if (!brw
->programs
[MESA_SHADER_GEOMETRY
]) {
115 /* BRW_NEW_VERTEX_PROGRAM */
116 prog
= ctx
->_Shader
->CurrentProgram
[MESA_SHADER_VERTEX
];
118 /* Skip making a binding table if we don't have anything to put in it */
119 const struct gl_transform_feedback_info
*linked_xfb_info
=
120 prog
->sh
.LinkedTransformFeedback
;
121 need_binding_table
= linked_xfb_info
->NumOutputs
> 0;
123 if (!need_binding_table
) {
124 if (brw
->ff_gs
.bind_bo_offset
!= 0) {
125 brw
->ctx
.NewDriverState
|= BRW_NEW_BINDING_TABLE_POINTERS
;
126 brw
->ff_gs
.bind_bo_offset
= 0;
131 /* Might want to calculate nr_surfaces first, to avoid taking up so much
132 * space for the binding table. Anyway, in this case we know that we only
133 * use BRW_MAX_SOL_BINDINGS surfaces at most.
135 bind
= brw_state_batch(brw
, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS
,
136 32, &brw
->ff_gs
.bind_bo_offset
);
138 /* BRW_NEW_SURFACES */
139 memcpy(bind
, brw
->ff_gs
.surf_offset
,
140 BRW_MAX_SOL_BINDINGS
* sizeof(uint32_t));
142 /* BRW_NEW_GEOMETRY_PROGRAM */
143 prog
= ctx
->_Shader
->CurrentProgram
[MESA_SHADER_GEOMETRY
];
145 /* Skip making a binding table if we don't have anything to put in it */
146 struct brw_stage_prog_data
*prog_data
= brw
->gs
.base
.prog_data
;
147 const struct gl_transform_feedback_info
*linked_xfb_info
=
148 prog
->sh
.LinkedTransformFeedback
;
149 need_binding_table
= linked_xfb_info
->NumOutputs
> 0 ||
150 prog_data
->binding_table
.size_bytes
> 0;
152 if (!need_binding_table
) {
153 if (brw
->gs
.base
.bind_bo_offset
!= 0) {
154 brw
->gs
.base
.bind_bo_offset
= 0;
155 brw
->ctx
.NewDriverState
|= BRW_NEW_BINDING_TABLE_POINTERS
;
160 /* Might want to calculate nr_surfaces first, to avoid taking up so much
161 * space for the binding table.
163 bind
= brw_state_batch(brw
, sizeof(uint32_t) * BRW_MAX_SURFACES
,
164 32, &brw
->gs
.base
.bind_bo_offset
);
166 /* BRW_NEW_SURFACES */
167 memcpy(bind
, brw
->gs
.base
.surf_offset
,
168 BRW_MAX_SURFACES
* sizeof(uint32_t));
171 brw
->ctx
.NewDriverState
|= BRW_NEW_BINDING_TABLE_POINTERS
;
174 const struct brw_tracked_state gen6_gs_binding_table
= {
177 .brw
= BRW_NEW_BATCH
|
179 BRW_NEW_GEOMETRY_PROGRAM
|
180 BRW_NEW_VERTEX_PROGRAM
|
183 .emit
= brw_gs_upload_binding_table
,
186 struct gl_transform_feedback_object
*
187 brw_new_transform_feedback(struct gl_context
*ctx
, GLuint name
)
189 struct brw_context
*brw
= brw_context(ctx
);
190 struct brw_transform_feedback_object
*brw_obj
=
191 CALLOC_STRUCT(brw_transform_feedback_object
);
195 _mesa_init_transform_feedback_object(&brw_obj
->base
, name
);
198 brw_bo_alloc(brw
->bufmgr
, "transform feedback offsets", 16, 64);
199 brw_obj
->prim_count_bo
=
200 brw_bo_alloc(brw
->bufmgr
, "xfb primitive counts", 4096, 64);
202 return &brw_obj
->base
;
206 brw_delete_transform_feedback(struct gl_context
*ctx
,
207 struct gl_transform_feedback_object
*obj
)
209 struct brw_transform_feedback_object
*brw_obj
=
210 (struct brw_transform_feedback_object
*) obj
;
212 for (unsigned i
= 0; i
< ARRAY_SIZE(obj
->Buffers
); i
++) {
213 _mesa_reference_buffer_object(ctx
, &obj
->Buffers
[i
], NULL
);
216 brw_bo_unreference(brw_obj
->offset_bo
);
217 brw_bo_unreference(brw_obj
->prim_count_bo
);
223 * Tally the number of primitives generated so far.
225 * The buffer contains a series of pairs:
226 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
227 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
229 * For each stream, we subtract the pair of values (end - start) to get the
230 * number of primitives generated during one section. We accumulate these
231 * values, adding them up to get the total number of primitives generated.
233 * Note that we expose one stream pre-Gen7, so the above is just (start, end).
236 tally_prims_generated(struct brw_context
*brw
,
237 struct brw_transform_feedback_object
*obj
)
239 const struct gl_context
*ctx
= &brw
->ctx
;
240 const int streams
= ctx
->Const
.MaxVertexStreams
;
242 /* If the current batch is still contributing to the number of primitives
243 * generated, flush it now so the results will be present when mapped.
245 if (brw_batch_references(&brw
->batch
, obj
->prim_count_bo
))
246 intel_batchbuffer_flush(brw
);
248 if (unlikely(brw
->perf_debug
&& brw_bo_busy(obj
->prim_count_bo
)))
249 perf_debug("Stalling for # of transform feedback primitives written.\n");
251 uint64_t *prim_counts
= brw_bo_map(brw
, obj
->prim_count_bo
, MAP_READ
);
253 assert(obj
->prim_count_buffer_index
% (2 * streams
) == 0);
254 int pairs
= obj
->prim_count_buffer_index
/ (2 * streams
);
256 for (int i
= 0; i
< pairs
; i
++) {
257 for (int s
= 0; s
< streams
; s
++) {
258 obj
->prims_generated
[s
] += prim_counts
[streams
+ s
] - prim_counts
[s
];
260 prim_counts
+= 2 * streams
; /* move to the next pair */
263 brw_bo_unmap(obj
->prim_count_bo
);
265 /* We've already gathered up the old data; we can safely overwrite it now. */
266 obj
->prim_count_buffer_index
= 0;
270 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
273 * If prim_count_bo is out of space, gather up the results so far into
274 * prims_generated[] and allocate a new buffer with enough space.
276 * The number of primitives written is used to compute the number of vertices
277 * written to a transform feedback stream, which is required to implement
278 * DrawTransformFeedback().
281 brw_save_primitives_written_counters(struct brw_context
*brw
,
282 struct brw_transform_feedback_object
*obj
)
284 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
285 const struct gl_context
*ctx
= &brw
->ctx
;
286 const int streams
= ctx
->Const
.MaxVertexStreams
;
288 assert(obj
->prim_count_bo
!= NULL
);
290 /* Check if there's enough space for a new pair of four values. */
291 if (obj
->prim_count_buffer_index
+ 2 * streams
>= 4096 / sizeof(uint64_t)) {
292 /* Gather up the results so far and release the BO. */
293 tally_prims_generated(brw
, obj
);
296 /* Flush any drawing so that the counters have the right values. */
297 brw_emit_mi_flush(brw
);
299 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
300 if (devinfo
->gen
>= 7) {
301 for (int i
= 0; i
< streams
; i
++) {
302 int offset
= (obj
->prim_count_buffer_index
+ i
) * sizeof(uint64_t);
303 brw_store_register_mem64(brw
, obj
->prim_count_bo
,
304 GEN7_SO_NUM_PRIMS_WRITTEN(i
),
308 brw_store_register_mem64(brw
, obj
->prim_count_bo
,
309 GEN6_SO_NUM_PRIMS_WRITTEN
,
310 obj
->prim_count_buffer_index
* sizeof(uint64_t));
313 /* Update where to write data to. */
314 obj
->prim_count_buffer_index
+= streams
;
318 compute_vertices_written_so_far(struct brw_context
*brw
,
319 struct brw_transform_feedback_object
*obj
,
320 uint64_t *vertices_written
)
322 const struct gl_context
*ctx
= &brw
->ctx
;
323 unsigned vertices_per_prim
= 0;
325 switch (obj
->primitive_mode
) {
327 vertices_per_prim
= 1;
330 vertices_per_prim
= 2;
333 vertices_per_prim
= 3;
336 unreachable("Invalid transform feedback primitive mode.");
339 /* Get the number of primitives generated. */
340 tally_prims_generated(brw
, obj
);
342 for (int i
= 0; i
< ctx
->Const
.MaxVertexStreams
; i
++) {
343 vertices_written
[i
] = vertices_per_prim
* obj
->prims_generated
[i
];
348 * Compute the number of vertices written by this transform feedback operation.
351 brw_compute_xfb_vertices_written(struct brw_context
*brw
,
352 struct brw_transform_feedback_object
*obj
)
354 if (obj
->vertices_written_valid
|| !obj
->base
.EndedAnytime
)
357 compute_vertices_written_so_far(brw
, obj
, obj
->vertices_written
);
359 obj
->vertices_written_valid
= true;
363 * GetTransformFeedbackVertexCount() driver hook.
365 * Returns the number of vertices written to a particular stream by the last
366 * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback().
369 brw_get_transform_feedback_vertex_count(struct gl_context
*ctx
,
370 struct gl_transform_feedback_object
*obj
,
373 struct brw_context
*brw
= brw_context(ctx
);
374 struct brw_transform_feedback_object
*brw_obj
=
375 (struct brw_transform_feedback_object
*) obj
;
377 assert(obj
->EndedAnytime
);
378 assert(stream
< ctx
->Const
.MaxVertexStreams
);
380 brw_compute_xfb_vertices_written(brw
, brw_obj
);
381 return brw_obj
->vertices_written
[stream
];
385 brw_begin_transform_feedback(struct gl_context
*ctx
, GLenum mode
,
386 struct gl_transform_feedback_object
*obj
)
388 struct brw_context
*brw
= brw_context(ctx
);
389 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
390 const struct gl_program
*prog
;
391 const struct gl_transform_feedback_info
*linked_xfb_info
;
392 struct gl_transform_feedback_object
*xfb_obj
=
393 ctx
->TransformFeedback
.CurrentObject
;
394 struct brw_transform_feedback_object
*brw_obj
=
395 (struct brw_transform_feedback_object
*) xfb_obj
;
397 assert(devinfo
->gen
== 6);
399 if (ctx
->_Shader
->CurrentProgram
[MESA_SHADER_GEOMETRY
]) {
400 /* BRW_NEW_GEOMETRY_PROGRAM */
401 prog
= ctx
->_Shader
->CurrentProgram
[MESA_SHADER_GEOMETRY
];
403 /* BRW_NEW_VERTEX_PROGRAM */
404 prog
= ctx
->_Shader
->CurrentProgram
[MESA_SHADER_VERTEX
];
406 linked_xfb_info
= prog
->sh
.LinkedTransformFeedback
;
408 /* Compute the maximum number of vertices that we can write without
409 * overflowing any of the buffers currently being used for feedback.
412 = _mesa_compute_max_transform_feedback_vertices(ctx
, xfb_obj
,
415 /* Initialize the SVBI 0 register to zero and set the maximum index. */
417 OUT_BATCH(_3DSTATE_GS_SVB_INDEX
<< 16 | (4 - 2));
418 OUT_BATCH(0); /* SVBI 0 */
419 OUT_BATCH(0); /* starting index */
420 OUT_BATCH(brw_obj
->max_index
);
423 /* Initialize the rest of the unused streams to sane values. Otherwise,
424 * they may indicate that there is no room to write data and prevent
425 * anything from happening at all.
427 for (int i
= 1; i
< 4; i
++) {
429 OUT_BATCH(_3DSTATE_GS_SVB_INDEX
<< 16 | (4 - 2));
430 OUT_BATCH(i
<< SVB_INDEX_SHIFT
);
431 OUT_BATCH(0); /* starting index */
432 OUT_BATCH(0xffffffff);
436 /* We're about to lose the information needed to compute the number of
437 * vertices written during the last Begin/EndTransformFeedback section,
438 * so we can't delay it any further.
440 brw_compute_xfb_vertices_written(brw
, brw_obj
);
442 /* No primitives have been generated yet. */
443 for (int i
= 0; i
< BRW_MAX_XFB_STREAMS
; i
++) {
444 brw_obj
->prims_generated
[i
] = 0;
447 /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
448 brw_save_primitives_written_counters(brw
, brw_obj
);
450 brw_obj
->primitive_mode
= mode
;
454 brw_end_transform_feedback(struct gl_context
*ctx
,
455 struct gl_transform_feedback_object
*obj
)
457 struct brw_context
*brw
= brw_context(ctx
);
458 struct brw_transform_feedback_object
*brw_obj
=
459 (struct brw_transform_feedback_object
*) obj
;
461 /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
463 brw_save_primitives_written_counters(brw
, brw_obj
);
465 /* EndTransformFeedback() means that we need to update the number of
466 * vertices written. Since it's only necessary if DrawTransformFeedback()
467 * is called and it means mapping a buffer object, we delay computing it
468 * until it's absolutely necessary to try and avoid stalls.
470 brw_obj
->vertices_written_valid
= false;
474 brw_pause_transform_feedback(struct gl_context
*ctx
,
475 struct gl_transform_feedback_object
*obj
)
477 struct brw_context
*brw
= brw_context(ctx
);
478 struct brw_transform_feedback_object
*brw_obj
=
479 (struct brw_transform_feedback_object
*) obj
;
481 /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
482 * While this operation is paused, other transform feedback actions may
483 * occur, which will contribute to the counters. We need to exclude that
486 brw_save_primitives_written_counters(brw
, brw_obj
);
490 brw_resume_transform_feedback(struct gl_context
*ctx
,
491 struct gl_transform_feedback_object
*obj
)
493 struct brw_context
*brw
= brw_context(ctx
);
494 struct brw_transform_feedback_object
*brw_obj
=
495 (struct brw_transform_feedback_object
*) obj
;
497 /* Reload SVBI 0 with the count of vertices written so far. */
499 compute_vertices_written_so_far(brw
, brw_obj
, &svbi
);
502 OUT_BATCH(_3DSTATE_GS_SVB_INDEX
<< 16 | (4 - 2));
503 OUT_BATCH(0); /* SVBI 0 */
504 OUT_BATCH((uint32_t) svbi
); /* starting index */
505 OUT_BATCH(brw_obj
->max_index
);
508 /* Initialize the rest of the unused streams to sane values. Otherwise,
509 * they may indicate that there is no room to write data and prevent
510 * anything from happening at all.
512 for (int i
= 1; i
< 4; i
++) {
514 OUT_BATCH(_3DSTATE_GS_SVB_INDEX
<< 16 | (4 - 2));
515 OUT_BATCH(i
<< SVB_INDEX_SHIFT
);
516 OUT_BATCH(0); /* starting index */
517 OUT_BATCH(0xffffffff);
521 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
522 brw_save_primitives_written_counters(brw
, brw_obj
);