2 * Copyright 2013 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors: Marek Olšák <maraeo@gmail.com>
27 #include "r600_pipe_common.h"
30 #include "util/u_memory.h"
31 #include "evergreend.h"
33 #define R_008490_CP_STRMOUT_CNTL 0x008490
34 #define R_028AB0_VGT_STRMOUT_EN 0x028AB0
35 #define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20
37 static void r600_set_streamout_enable(struct r600_common_context
*rctx
, bool enable
);
39 static struct pipe_stream_output_target
*
40 r600_create_so_target(struct pipe_context
*ctx
,
41 struct pipe_resource
*buffer
,
42 unsigned buffer_offset
,
45 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
46 struct r600_so_target
*t
;
47 struct r600_resource
*rbuffer
= (struct r600_resource
*)buffer
;
49 t
= CALLOC_STRUCT(r600_so_target
);
54 u_suballocator_alloc(rctx
->allocator_zeroed_memory
, 4, 4,
55 &t
->buf_filled_size_offset
,
56 (struct pipe_resource
**)&t
->buf_filled_size
);
57 if (!t
->buf_filled_size
) {
62 t
->b
.reference
.count
= 1;
64 pipe_resource_reference(&t
->b
.buffer
, buffer
);
65 t
->b
.buffer_offset
= buffer_offset
;
66 t
->b
.buffer_size
= buffer_size
;
68 util_range_add(buffer
, &rbuffer
->valid_buffer_range
, buffer_offset
,
69 buffer_offset
+ buffer_size
);
73 static void r600_so_target_destroy(struct pipe_context
*ctx
,
74 struct pipe_stream_output_target
*target
)
76 struct r600_so_target
*t
= (struct r600_so_target
*)target
;
77 pipe_resource_reference(&t
->b
.buffer
, NULL
);
78 r600_resource_reference(&t
->buf_filled_size
, NULL
);
82 void r600_streamout_buffers_dirty(struct r600_common_context
*rctx
)
84 struct r600_atom
*begin
= &rctx
->streamout
.begin_atom
;
85 unsigned num_bufs
= util_bitcount(rctx
->streamout
.enabled_mask
);
86 unsigned num_bufs_appended
= util_bitcount(rctx
->streamout
.enabled_mask
&
87 rctx
->streamout
.append_bitmask
);
92 rctx
->streamout
.num_dw_for_end
=
93 12 + /* flush_vgt_streamout */
94 num_bufs
* 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
96 begin
->num_dw
= 12; /* flush_vgt_streamout */
98 begin
->num_dw
+= num_bufs
* 7; /* SET_CONTEXT_REG */
100 if (rctx
->family
>= CHIP_RS780
&& rctx
->family
<= CHIP_RV740
)
101 begin
->num_dw
+= num_bufs
* 5; /* STRMOUT_BASE_UPDATE */
104 num_bufs_appended
* 8 + /* STRMOUT_BUFFER_UPDATE */
105 (num_bufs
- num_bufs_appended
) * 6 + /* STRMOUT_BUFFER_UPDATE */
106 (rctx
->family
> CHIP_R600
&& rctx
->family
< CHIP_RS780
? 2 : 0); /* SURFACE_BASE_UPDATE */
108 rctx
->set_atom_dirty(rctx
, begin
, true);
110 r600_set_streamout_enable(rctx
, true);
113 void r600_set_streamout_targets(struct pipe_context
*ctx
,
114 unsigned num_targets
,
115 struct pipe_stream_output_target
**targets
,
116 const unsigned *offsets
)
118 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
120 unsigned enabled_mask
= 0, append_bitmask
= 0;
122 /* Stop streamout. */
123 if (rctx
->streamout
.num_targets
&& rctx
->streamout
.begin_emitted
) {
124 r600_emit_streamout_end(rctx
);
127 /* Set the new targets. */
128 for (i
= 0; i
< num_targets
; i
++) {
129 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->streamout
.targets
[i
], targets
[i
]);
133 r600_context_add_resource_size(ctx
, targets
[i
]->buffer
);
134 enabled_mask
|= 1 << i
;
135 if (offsets
[i
] == ((unsigned)-1))
136 append_bitmask
|= 1 << i
;
138 for (; i
< rctx
->streamout
.num_targets
; i
++) {
139 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->streamout
.targets
[i
], NULL
);
142 rctx
->streamout
.enabled_mask
= enabled_mask
;
144 rctx
->streamout
.num_targets
= num_targets
;
145 rctx
->streamout
.append_bitmask
= append_bitmask
;
148 r600_streamout_buffers_dirty(rctx
);
150 rctx
->set_atom_dirty(rctx
, &rctx
->streamout
.begin_atom
, false);
151 r600_set_streamout_enable(rctx
, false);
155 static void r600_flush_vgt_streamout(struct r600_common_context
*rctx
)
157 struct radeon_cmdbuf
*cs
= rctx
->gfx
.cs
;
158 unsigned reg_strmout_cntl
;
160 /* The register is at different places on different ASICs. */
161 if (rctx
->chip_class
>= EVERGREEN
) {
162 reg_strmout_cntl
= R_0084FC_CP_STRMOUT_CNTL
;
164 reg_strmout_cntl
= R_008490_CP_STRMOUT_CNTL
;
167 radeon_set_config_reg(cs
, reg_strmout_cntl
, 0);
169 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
170 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH
) | EVENT_INDEX(0));
172 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
173 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
); /* wait until the register is equal to the reference value */
174 radeon_emit(cs
, reg_strmout_cntl
>> 2); /* register */
176 radeon_emit(cs
, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
177 radeon_emit(cs
, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
178 radeon_emit(cs
, 4); /* poll interval */
181 static void r600_emit_streamout_begin(struct r600_common_context
*rctx
, struct r600_atom
*atom
)
183 struct radeon_cmdbuf
*cs
= rctx
->gfx
.cs
;
184 struct r600_so_target
**t
= rctx
->streamout
.targets
;
185 uint16_t *stride_in_dw
= rctx
->streamout
.stride_in_dw
;
186 unsigned i
, update_flags
= 0;
188 r600_flush_vgt_streamout(rctx
);
190 for (i
= 0; i
< rctx
->streamout
.num_targets
; i
++) {
194 t
[i
]->stride_in_dw
= stride_in_dw
[i
];
196 uint64_t va
= r600_resource(t
[i
]->b
.buffer
)->gpu_address
;
198 update_flags
|= SURFACE_BASE_UPDATE_STRMOUT(i
);
200 radeon_set_context_reg_seq(cs
, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+ 16*i
, 3);
201 radeon_emit(cs
, (t
[i
]->b
.buffer_offset
+
202 t
[i
]->b
.buffer_size
) >> 2); /* BUFFER_SIZE (in DW) */
203 radeon_emit(cs
, stride_in_dw
[i
]); /* VTX_STRIDE (in DW) */
204 radeon_emit(cs
, va
>> 8); /* BUFFER_BASE */
206 r600_emit_reloc(rctx
, &rctx
->gfx
, r600_resource(t
[i
]->b
.buffer
),
207 RADEON_USAGE_WRITE
, RADEON_PRIO_SHADER_RW_BUFFER
);
209 /* R7xx requires this packet after updating BUFFER_BASE.
210 * Without this, R7xx locks up. */
211 if (rctx
->family
>= CHIP_RS780
&& rctx
->family
<= CHIP_RV740
) {
212 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BASE_UPDATE
, 1, 0));
214 radeon_emit(cs
, va
>> 8);
216 r600_emit_reloc(rctx
, &rctx
->gfx
, r600_resource(t
[i
]->b
.buffer
),
217 RADEON_USAGE_WRITE
, RADEON_PRIO_SHADER_RW_BUFFER
);
220 if (rctx
->streamout
.append_bitmask
& (1 << i
) && t
[i
]->buf_filled_size_valid
) {
221 uint64_t va
= t
[i
]->buf_filled_size
->gpu_address
+
222 t
[i
]->buf_filled_size_offset
;
225 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
226 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
227 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM
)); /* control */
228 radeon_emit(cs
, 0); /* unused */
229 radeon_emit(cs
, 0); /* unused */
230 radeon_emit(cs
, va
); /* src address lo */
231 radeon_emit(cs
, va
>> 32); /* src address hi */
233 r600_emit_reloc(rctx
, &rctx
->gfx
, t
[i
]->buf_filled_size
,
234 RADEON_USAGE_READ
, RADEON_PRIO_SO_FILLED_SIZE
);
236 /* Start from the beginning. */
237 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
238 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
239 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET
)); /* control */
240 radeon_emit(cs
, 0); /* unused */
241 radeon_emit(cs
, 0); /* unused */
242 radeon_emit(cs
, t
[i
]->b
.buffer_offset
>> 2); /* buffer offset in DW */
243 radeon_emit(cs
, 0); /* unused */
247 if (rctx
->family
> CHIP_R600
&& rctx
->family
< CHIP_RV770
) {
248 radeon_emit(cs
, PKT3(PKT3_SURFACE_BASE_UPDATE
, 0, 0));
249 radeon_emit(cs
, update_flags
);
251 rctx
->streamout
.begin_emitted
= true;
254 void r600_emit_streamout_end(struct r600_common_context
*rctx
)
256 struct radeon_cmdbuf
*cs
= rctx
->gfx
.cs
;
257 struct r600_so_target
**t
= rctx
->streamout
.targets
;
261 r600_flush_vgt_streamout(rctx
);
263 for (i
= 0; i
< rctx
->streamout
.num_targets
; i
++) {
267 va
= t
[i
]->buf_filled_size
->gpu_address
+ t
[i
]->buf_filled_size_offset
;
268 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
269 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
270 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE
) |
271 STRMOUT_STORE_BUFFER_FILLED_SIZE
); /* control */
272 radeon_emit(cs
, va
); /* dst address lo */
273 radeon_emit(cs
, va
>> 32); /* dst address hi */
274 radeon_emit(cs
, 0); /* unused */
275 radeon_emit(cs
, 0); /* unused */
277 r600_emit_reloc(rctx
, &rctx
->gfx
, t
[i
]->buf_filled_size
,
278 RADEON_USAGE_WRITE
, RADEON_PRIO_SO_FILLED_SIZE
);
280 /* Zero the buffer size. The counters (primitives generated,
281 * primitives emitted) may be enabled even if there is not
282 * buffer bound. This ensures that the primitives-emitted query
283 * won't increment. */
284 radeon_set_context_reg(cs
, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+ 16*i
, 0);
286 t
[i
]->buf_filled_size_valid
= true;
289 rctx
->streamout
.begin_emitted
= false;
290 rctx
->flags
|= R600_CONTEXT_STREAMOUT_FLUSH
;
293 /* STREAMOUT CONFIG DERIVED STATE
295 * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
296 * The buffer mask is an independent state, so no writes occur if there
297 * are no buffers bound.
300 static void r600_emit_streamout_enable(struct r600_common_context
*rctx
,
301 struct r600_atom
*atom
)
303 unsigned strmout_config_reg
= R_028AB0_VGT_STRMOUT_EN
;
304 unsigned strmout_config_val
= S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx
));
305 unsigned strmout_buffer_reg
= R_028B20_VGT_STRMOUT_BUFFER_EN
;
306 unsigned strmout_buffer_val
= rctx
->streamout
.hw_enabled_mask
&
307 rctx
->streamout
.enabled_stream_buffers_mask
;
309 if (rctx
->chip_class
>= EVERGREEN
) {
310 strmout_buffer_reg
= R_028B98_VGT_STRMOUT_BUFFER_CONFIG
;
312 strmout_config_reg
= R_028B94_VGT_STRMOUT_CONFIG
;
313 strmout_config_val
|=
314 S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx
)) |
315 S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx
)) |
316 S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx
));
318 radeon_set_context_reg(rctx
->gfx
.cs
, strmout_buffer_reg
, strmout_buffer_val
);
319 radeon_set_context_reg(rctx
->gfx
.cs
, strmout_config_reg
, strmout_config_val
);
322 static void r600_set_streamout_enable(struct r600_common_context
*rctx
, bool enable
)
324 bool old_strmout_en
= r600_get_strmout_en(rctx
);
325 unsigned old_hw_enabled_mask
= rctx
->streamout
.hw_enabled_mask
;
327 rctx
->streamout
.streamout_enabled
= enable
;
329 rctx
->streamout
.hw_enabled_mask
= rctx
->streamout
.enabled_mask
|
330 (rctx
->streamout
.enabled_mask
<< 4) |
331 (rctx
->streamout
.enabled_mask
<< 8) |
332 (rctx
->streamout
.enabled_mask
<< 12);
334 if ((old_strmout_en
!= r600_get_strmout_en(rctx
)) ||
335 (old_hw_enabled_mask
!= rctx
->streamout
.hw_enabled_mask
)) {
336 rctx
->set_atom_dirty(rctx
, &rctx
->streamout
.enable_atom
, true);
340 void r600_update_prims_generated_query_state(struct r600_common_context
*rctx
,
341 unsigned type
, int diff
)
343 if (type
== PIPE_QUERY_PRIMITIVES_GENERATED
) {
344 bool old_strmout_en
= r600_get_strmout_en(rctx
);
346 rctx
->streamout
.num_prims_gen_queries
+= diff
;
347 assert(rctx
->streamout
.num_prims_gen_queries
>= 0);
349 rctx
->streamout
.prims_gen_query_enabled
=
350 rctx
->streamout
.num_prims_gen_queries
!= 0;
352 if (old_strmout_en
!= r600_get_strmout_en(rctx
)) {
353 rctx
->set_atom_dirty(rctx
, &rctx
->streamout
.enable_atom
, true);
358 void r600_streamout_init(struct r600_common_context
*rctx
)
360 rctx
->b
.create_stream_output_target
= r600_create_so_target
;
361 rctx
->b
.stream_output_target_destroy
= r600_so_target_destroy
;
362 rctx
->streamout
.begin_atom
.emit
= r600_emit_streamout_begin
;
363 rctx
->streamout
.enable_atom
.emit
= r600_emit_streamout_enable
;
364 rctx
->streamout
.enable_atom
.num_dw
= 6;