2 * Copyright 2013 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors: Marek Olšák <maraeo@gmail.com>
29 #include "radeon/r600_cs.h"
31 #include "util/u_memory.h"
33 static void si_set_streamout_enable(struct si_context
*sctx
, bool enable
);
35 static struct pipe_stream_output_target
*
36 si_create_so_target(struct pipe_context
*ctx
,
37 struct pipe_resource
*buffer
,
38 unsigned buffer_offset
,
41 struct si_context
*sctx
= (struct si_context
*)ctx
;
42 struct si_streamout_target
*t
;
43 struct r600_resource
*rbuffer
= (struct r600_resource
*)buffer
;
45 t
= CALLOC_STRUCT(si_streamout_target
);
50 u_suballocator_alloc(sctx
->b
.allocator_zeroed_memory
, 4, 4,
51 &t
->buf_filled_size_offset
,
52 (struct pipe_resource
**)&t
->buf_filled_size
);
53 if (!t
->buf_filled_size
) {
58 t
->b
.reference
.count
= 1;
60 pipe_resource_reference(&t
->b
.buffer
, buffer
);
61 t
->b
.buffer_offset
= buffer_offset
;
62 t
->b
.buffer_size
= buffer_size
;
64 util_range_add(&rbuffer
->valid_buffer_range
, buffer_offset
,
65 buffer_offset
+ buffer_size
);
69 static void si_so_target_destroy(struct pipe_context
*ctx
,
70 struct pipe_stream_output_target
*target
)
72 struct si_streamout_target
*t
= (struct si_streamout_target
*)target
;
73 pipe_resource_reference(&t
->b
.buffer
, NULL
);
74 r600_resource_reference(&t
->buf_filled_size
, NULL
);
78 void si_streamout_buffers_dirty(struct si_context
*sctx
)
80 if (!sctx
->streamout
.enabled_mask
)
83 si_mark_atom_dirty(sctx
, &sctx
->streamout
.begin_atom
);
84 si_set_streamout_enable(sctx
, true);
87 void si_common_set_streamout_targets(struct pipe_context
*ctx
,
89 struct pipe_stream_output_target
**targets
,
90 const unsigned *offsets
)
92 struct si_context
*sctx
= (struct si_context
*)ctx
;
94 unsigned enabled_mask
= 0, append_bitmask
= 0;
97 if (sctx
->streamout
.num_targets
&& sctx
->streamout
.begin_emitted
) {
98 si_emit_streamout_end(sctx
);
101 /* Set the new targets. */
102 for (i
= 0; i
< num_targets
; i
++) {
103 pipe_so_target_reference((struct pipe_stream_output_target
**)&sctx
->streamout
.targets
[i
], targets
[i
]);
107 r600_context_add_resource_size(ctx
, targets
[i
]->buffer
);
108 enabled_mask
|= 1 << i
;
109 if (offsets
[i
] == ((unsigned)-1))
110 append_bitmask
|= 1 << i
;
112 for (; i
< sctx
->streamout
.num_targets
; i
++) {
113 pipe_so_target_reference((struct pipe_stream_output_target
**)&sctx
->streamout
.targets
[i
], NULL
);
116 sctx
->streamout
.enabled_mask
= enabled_mask
;
118 sctx
->streamout
.num_targets
= num_targets
;
119 sctx
->streamout
.append_bitmask
= append_bitmask
;
122 si_streamout_buffers_dirty(sctx
);
124 si_set_atom_dirty(sctx
, &sctx
->streamout
.begin_atom
, false);
125 si_set_streamout_enable(sctx
, false);
129 static void si_flush_vgt_streamout(struct si_context
*sctx
)
131 struct radeon_winsys_cs
*cs
= sctx
->b
.gfx
.cs
;
132 unsigned reg_strmout_cntl
;
134 /* The register is at different places on different ASICs. */
135 if (sctx
->b
.chip_class
>= CIK
) {
136 reg_strmout_cntl
= R_0300FC_CP_STRMOUT_CNTL
;
137 radeon_set_uconfig_reg(cs
, reg_strmout_cntl
, 0);
139 reg_strmout_cntl
= R_0084FC_CP_STRMOUT_CNTL
;
140 radeon_set_config_reg(cs
, reg_strmout_cntl
, 0);
143 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
144 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH
) | EVENT_INDEX(0));
146 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
147 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
); /* wait until the register is equal to the reference value */
148 radeon_emit(cs
, reg_strmout_cntl
>> 2); /* register */
150 radeon_emit(cs
, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
151 radeon_emit(cs
, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
152 radeon_emit(cs
, 4); /* poll interval */
155 static void si_emit_streamout_begin(struct r600_common_context
*rctx
, struct r600_atom
*atom
)
157 struct si_context
*sctx
= (struct si_context
*)rctx
;
158 struct radeon_winsys_cs
*cs
= sctx
->b
.gfx
.cs
;
159 struct si_streamout_target
**t
= sctx
->streamout
.targets
;
160 uint16_t *stride_in_dw
= sctx
->streamout
.stride_in_dw
;
163 si_flush_vgt_streamout(sctx
);
165 for (i
= 0; i
< sctx
->streamout
.num_targets
; i
++) {
169 t
[i
]->stride_in_dw
= stride_in_dw
[i
];
171 /* SI binds streamout buffers as shader resources.
172 * VGT only counts primitives and tells the shader
173 * through SGPRs what to do. */
174 radeon_set_context_reg_seq(cs
, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+ 16*i
, 2);
175 radeon_emit(cs
, (t
[i
]->b
.buffer_offset
+
176 t
[i
]->b
.buffer_size
) >> 2); /* BUFFER_SIZE (in DW) */
177 radeon_emit(cs
, stride_in_dw
[i
]); /* VTX_STRIDE (in DW) */
179 if (sctx
->streamout
.append_bitmask
& (1 << i
) && t
[i
]->buf_filled_size_valid
) {
180 uint64_t va
= t
[i
]->buf_filled_size
->gpu_address
+
181 t
[i
]->buf_filled_size_offset
;
184 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
185 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
186 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM
)); /* control */
187 radeon_emit(cs
, 0); /* unused */
188 radeon_emit(cs
, 0); /* unused */
189 radeon_emit(cs
, va
); /* src address lo */
190 radeon_emit(cs
, va
>> 32); /* src address hi */
192 r600_emit_reloc(&sctx
->b
, &sctx
->b
.gfx
, t
[i
]->buf_filled_size
,
193 RADEON_USAGE_READ
, RADEON_PRIO_SO_FILLED_SIZE
);
195 /* Start from the beginning. */
196 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
197 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
198 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET
)); /* control */
199 radeon_emit(cs
, 0); /* unused */
200 radeon_emit(cs
, 0); /* unused */
201 radeon_emit(cs
, t
[i
]->b
.buffer_offset
>> 2); /* buffer offset in DW */
202 radeon_emit(cs
, 0); /* unused */
206 sctx
->streamout
.begin_emitted
= true;
209 void si_emit_streamout_end(struct si_context
*sctx
)
211 struct radeon_winsys_cs
*cs
= sctx
->b
.gfx
.cs
;
212 struct si_streamout_target
**t
= sctx
->streamout
.targets
;
216 si_flush_vgt_streamout(sctx
);
218 for (i
= 0; i
< sctx
->streamout
.num_targets
; i
++) {
222 va
= t
[i
]->buf_filled_size
->gpu_address
+ t
[i
]->buf_filled_size_offset
;
223 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
224 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
225 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE
) |
226 STRMOUT_STORE_BUFFER_FILLED_SIZE
); /* control */
227 radeon_emit(cs
, va
); /* dst address lo */
228 radeon_emit(cs
, va
>> 32); /* dst address hi */
229 radeon_emit(cs
, 0); /* unused */
230 radeon_emit(cs
, 0); /* unused */
232 r600_emit_reloc(&sctx
->b
, &sctx
->b
.gfx
, t
[i
]->buf_filled_size
,
233 RADEON_USAGE_WRITE
, RADEON_PRIO_SO_FILLED_SIZE
);
235 /* Zero the buffer size. The counters (primitives generated,
236 * primitives emitted) may be enabled even if there is not
237 * buffer bound. This ensures that the primitives-emitted query
238 * won't increment. */
239 radeon_set_context_reg(cs
, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+ 16*i
, 0);
241 t
[i
]->buf_filled_size_valid
= true;
244 sctx
->streamout
.begin_emitted
= false;
245 sctx
->b
.flags
|= R600_CONTEXT_STREAMOUT_FLUSH
;
248 /* STREAMOUT CONFIG DERIVED STATE
250 * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
251 * The buffer mask is an independent state, so no writes occur if there
252 * are no buffers bound.
255 static void si_emit_streamout_enable(struct r600_common_context
*rctx
,
256 struct r600_atom
*atom
)
258 struct si_context
*sctx
= (struct si_context
*)rctx
;
260 radeon_set_context_reg_seq(sctx
->b
.gfx
.cs
, R_028B94_VGT_STRMOUT_CONFIG
, 2);
261 radeon_emit(sctx
->b
.gfx
.cs
,
262 S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx
)) |
263 S_028B94_RAST_STREAM(0) |
264 S_028B94_STREAMOUT_1_EN(si_get_strmout_en(sctx
)) |
265 S_028B94_STREAMOUT_2_EN(si_get_strmout_en(sctx
)) |
266 S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx
)));
267 radeon_emit(sctx
->b
.gfx
.cs
,
268 sctx
->streamout
.hw_enabled_mask
&
269 sctx
->streamout
.enabled_stream_buffers_mask
);
272 static void si_set_streamout_enable(struct si_context
*sctx
, bool enable
)
274 bool old_strmout_en
= si_get_strmout_en(sctx
);
275 unsigned old_hw_enabled_mask
= sctx
->streamout
.hw_enabled_mask
;
277 sctx
->streamout
.streamout_enabled
= enable
;
279 sctx
->streamout
.hw_enabled_mask
= sctx
->streamout
.enabled_mask
|
280 (sctx
->streamout
.enabled_mask
<< 4) |
281 (sctx
->streamout
.enabled_mask
<< 8) |
282 (sctx
->streamout
.enabled_mask
<< 12);
284 if ((old_strmout_en
!= si_get_strmout_en(sctx
)) ||
285 (old_hw_enabled_mask
!= sctx
->streamout
.hw_enabled_mask
))
286 si_mark_atom_dirty(sctx
, &sctx
->streamout
.enable_atom
);
289 void si_update_prims_generated_query_state(struct si_context
*sctx
,
290 unsigned type
, int diff
)
292 if (type
== PIPE_QUERY_PRIMITIVES_GENERATED
) {
293 bool old_strmout_en
= si_get_strmout_en(sctx
);
295 sctx
->streamout
.num_prims_gen_queries
+= diff
;
296 assert(sctx
->streamout
.num_prims_gen_queries
>= 0);
298 sctx
->streamout
.prims_gen_query_enabled
=
299 sctx
->streamout
.num_prims_gen_queries
!= 0;
301 if (old_strmout_en
!= si_get_strmout_en(sctx
))
302 si_mark_atom_dirty(sctx
, &sctx
->streamout
.enable_atom
);
306 void si_init_streamout_functions(struct si_context
*sctx
)
308 sctx
->b
.b
.create_stream_output_target
= si_create_so_target
;
309 sctx
->b
.b
.stream_output_target_destroy
= si_so_target_destroy
;
310 sctx
->streamout
.begin_atom
.emit
= si_emit_streamout_begin
;
311 sctx
->streamout
.enable_atom
.emit
= si_emit_streamout_enable
;