2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * An implementation of the transform feedback driver hooks for Haswell
26 * and later hardware. This uses MI_MATH to compute the number of vertices
27 * written (for use by DrawTransformFeedback()) without any CPU<->GPU
28 * synchronization which could stall.
31 #include "brw_context.h"
32 #include "brw_state.h"
33 #include "brw_defines.h"
34 #include "intel_batchbuffer.h"
35 #include "intel_buffer_objects.h"
36 #include "main/transformfeedback.h"
39 * We store several values in obj->prim_count_bo:
41 * [4x 32-bit values]: Final Number of Vertices Written
42 * [4x 32-bit values]: Tally of Primitives Written So Far
43 * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
45 * The first set of values is used by DrawTransformFeedback(), which
46 * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
47 * an indirect draw. The other values are just temporary storage.
50 #define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
51 #define START_OFFSET (TALLY_OFFSET * 2)
54 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
58 save_prim_start_values(struct brw_context
*brw
,
59 struct brw_transform_feedback_object
*obj
)
61 /* Flush any drawing so that the counters have the right values. */
62 brw_emit_mi_flush(brw
);
64 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
65 for (int i
= 0; i
< BRW_MAX_XFB_STREAMS
; i
++) {
66 brw_store_register_mem64(brw
, obj
->prim_count_bo
,
67 GEN7_SO_NUM_PRIMS_WRITTEN(i
),
68 START_OFFSET
+ i
* sizeof(uint64_t));
73 * Compute the number of primitives written during our most recent
74 * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
75 * minus the stashed "start" value), and add it to our running tally.
77 * If \p finalize is true, also compute the number of vertices written
78 * (by multiplying by the number of vertices per primitive), and store
79 * that to the "final" location.
81 * Otherwise, just overwrite the old tally with the new one.
84 tally_prims_written(struct brw_context
*brw
,
85 struct brw_transform_feedback_object
*obj
,
88 /* Flush any drawing so that the counters have the right values. */
89 brw_emit_mi_flush(brw
);
91 for (int i
= 0; i
< BRW_MAX_XFB_STREAMS
; i
++) {
93 brw_load_register_imm32(brw
, HSW_CS_GPR(0) + 4, 0);
94 brw_load_register_mem(brw
, HSW_CS_GPR(0), obj
->prim_count_bo
,
95 I915_GEM_DOMAIN_INSTRUCTION
,
96 I915_GEM_DOMAIN_INSTRUCTION
,
97 TALLY_OFFSET
+ i
* sizeof(uint32_t));
98 if (!obj
->base
.Paused
) {
99 /* GPR1 = Start Snapshot */
100 brw_load_register_mem64(brw
, HSW_CS_GPR(1), obj
->prim_count_bo
,
101 I915_GEM_DOMAIN_INSTRUCTION
,
102 I915_GEM_DOMAIN_INSTRUCTION
,
103 START_OFFSET
+ i
* sizeof(uint64_t));
104 /* GPR2 = Ending Snapshot */
105 brw_load_register_reg64(brw
, GEN7_SO_NUM_PRIMS_WRITTEN(i
), HSW_CS_GPR(2));
108 OUT_BATCH(HSW_MI_MATH
| (9 - 2));
109 /* GPR1 = GPR2 (End) - GPR1 (Start) */
110 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCA
, R2
));
111 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCB
, R1
));
112 OUT_BATCH(MI_MATH_ALU0(SUB
));
113 OUT_BATCH(MI_MATH_ALU2(STORE
, R1
, ACCU
));
114 /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
115 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCA
, R0
));
116 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCB
, R1
));
117 OUT_BATCH(MI_MATH_ALU0(ADD
));
118 OUT_BATCH(MI_MATH_ALU2(STORE
, R0
, ACCU
));
123 /* Write back the new tally */
124 brw_store_register_mem32(brw
, obj
->prim_count_bo
, HSW_CS_GPR(0),
125 TALLY_OFFSET
+ i
* sizeof(uint32_t));
127 /* Convert the number of primitives to the number of vertices. */
128 if (obj
->primitive_mode
== GL_LINES
) {
129 /* Double R0 (R0 = R0 + R0) */
131 OUT_BATCH(HSW_MI_MATH
| (5 - 2));
132 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCA
, R0
));
133 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCB
, R0
));
134 OUT_BATCH(MI_MATH_ALU0(ADD
));
135 OUT_BATCH(MI_MATH_ALU2(STORE
, R0
, ACCU
));
137 } else if (obj
->primitive_mode
== GL_TRIANGLES
) {
138 /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
140 OUT_BATCH(HSW_MI_MATH
| (9 - 2));
141 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCA
, R0
));
142 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCB
, R0
));
143 OUT_BATCH(MI_MATH_ALU0(ADD
));
144 OUT_BATCH(MI_MATH_ALU2(STORE
, R1
, ACCU
));
145 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCA
, R0
));
146 OUT_BATCH(MI_MATH_ALU2(LOAD
, SRCB
, R1
));
147 OUT_BATCH(MI_MATH_ALU0(ADD
));
148 OUT_BATCH(MI_MATH_ALU2(STORE
, R0
, ACCU
));
151 /* Store it to the final result */
152 brw_store_register_mem32(brw
, obj
->prim_count_bo
, HSW_CS_GPR(0),
153 i
* sizeof(uint32_t));
159 * BeginTransformFeedback() driver hook.
162 hsw_begin_transform_feedback(struct gl_context
*ctx
, GLenum mode
,
163 struct gl_transform_feedback_object
*obj
)
165 struct brw_context
*brw
= brw_context(ctx
);
166 struct brw_transform_feedback_object
*brw_obj
=
167 (struct brw_transform_feedback_object
*) obj
;
169 brw_obj
->primitive_mode
= mode
;
171 /* Reset the SO buffer offsets to 0. */
173 brw_obj
->zero_offsets
= true;
175 BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS
);
176 OUT_BATCH(MI_LOAD_REGISTER_IMM
| (1 + 2 * BRW_MAX_XFB_STREAMS
- 2));
177 for (int i
= 0; i
< BRW_MAX_XFB_STREAMS
; i
++) {
178 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i
));
184 /* Zero out the initial tallies */
185 brw_store_data_imm64(brw
, brw_obj
->prim_count_bo
, TALLY_OFFSET
, 0ull);
186 brw_store_data_imm64(brw
, brw_obj
->prim_count_bo
, TALLY_OFFSET
+ 8, 0ull);
188 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
189 save_prim_start_values(brw
, brw_obj
);
193 * PauseTransformFeedback() driver hook.
196 hsw_pause_transform_feedback(struct gl_context
*ctx
,
197 struct gl_transform_feedback_object
*obj
)
199 struct brw_context
*brw
= brw_context(ctx
);
200 struct brw_transform_feedback_object
*brw_obj
=
201 (struct brw_transform_feedback_object
*) obj
;
203 if (brw
->is_haswell
) {
204 /* Flush any drawing so that the counters have the right values. */
205 brw_emit_mi_flush(brw
);
207 /* Save the SOL buffer offset register values. */
208 for (int i
= 0; i
< BRW_MAX_XFB_STREAMS
; i
++) {
210 OUT_BATCH(MI_STORE_REGISTER_MEM
| (3 - 2));
211 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i
));
212 OUT_RELOC(brw_obj
->offset_bo
,
213 I915_GEM_DOMAIN_INSTRUCTION
, I915_GEM_DOMAIN_INSTRUCTION
,
214 i
* sizeof(uint32_t));
219 /* Add any primitives written to our tally */
220 tally_prims_written(brw
, brw_obj
, false);
224 * ResumeTransformFeedback() driver hook.
227 hsw_resume_transform_feedback(struct gl_context
*ctx
,
228 struct gl_transform_feedback_object
*obj
)
230 struct brw_context
*brw
= brw_context(ctx
);
231 struct brw_transform_feedback_object
*brw_obj
=
232 (struct brw_transform_feedback_object
*) obj
;
234 if (brw
->is_haswell
) {
235 /* Reload the SOL buffer offset registers. */
236 for (int i
= 0; i
< BRW_MAX_XFB_STREAMS
; i
++) {
238 OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM
| (3 - 2));
239 OUT_BATCH(GEN7_SO_WRITE_OFFSET(i
));
240 OUT_RELOC(brw_obj
->offset_bo
,
241 I915_GEM_DOMAIN_INSTRUCTION
, I915_GEM_DOMAIN_INSTRUCTION
,
242 i
* sizeof(uint32_t));
247 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
248 save_prim_start_values(brw
, brw_obj
);
252 * EndTransformFeedback() driver hook.
255 hsw_end_transform_feedback(struct gl_context
*ctx
,
256 struct gl_transform_feedback_object
*obj
)
258 struct brw_context
*brw
= brw_context(ctx
);
259 struct brw_transform_feedback_object
*brw_obj
=
260 (struct brw_transform_feedback_object
*) obj
;
262 /* Add any primitives written to our tally, convert it from the number
263 * of primitives written to the number of vertices written, and store
264 * it in the "final" location in the buffer which DrawTransformFeedback()
265 * will use as the vertex count.
267 tally_prims_written(brw
, brw_obj
, true);