2 * Copyright 2013 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors: Marek Olšák <maraeo@gmail.com>
27 #include "r600_pipe_common.h"
30 #include "util/u_memory.h"
32 static struct pipe_stream_output_target
*
33 r600_create_so_target(struct pipe_context
*ctx
,
34 struct pipe_resource
*buffer
,
35 unsigned buffer_offset
,
38 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
39 struct r600_so_target
*t
;
40 struct r600_resource
*rbuffer
= (struct r600_resource
*)buffer
;
42 t
= CALLOC_STRUCT(r600_so_target
);
47 u_suballocator_alloc(rctx
->allocator_so_filled_size
, 4,
48 &t
->buf_filled_size_offset
,
49 (struct pipe_resource
**)&t
->buf_filled_size
);
50 if (!t
->buf_filled_size
) {
55 t
->b
.reference
.count
= 1;
57 pipe_resource_reference(&t
->b
.buffer
, buffer
);
58 t
->b
.buffer_offset
= buffer_offset
;
59 t
->b
.buffer_size
= buffer_size
;
61 util_range_add(&rbuffer
->valid_buffer_range
, buffer_offset
,
62 buffer_offset
+ buffer_size
);
66 static void r600_so_target_destroy(struct pipe_context
*ctx
,
67 struct pipe_stream_output_target
*target
)
69 struct r600_so_target
*t
= (struct r600_so_target
*)target
;
70 pipe_resource_reference(&t
->b
.buffer
, NULL
);
71 pipe_resource_reference((struct pipe_resource
**)&t
->buf_filled_size
, NULL
);
75 void r600_streamout_buffers_dirty(struct r600_common_context
*rctx
)
77 struct r600_atom
*begin
= &rctx
->streamout
.begin_atom
;
78 unsigned num_bufs
= util_bitcount(rctx
->streamout
.enabled_mask
);
79 unsigned num_bufs_appended
= util_bitcount(rctx
->streamout
.enabled_mask
&
80 rctx
->streamout
.append_bitmask
);
85 rctx
->streamout
.num_dw_for_end
=
86 12 + /* flush_vgt_streamout */
87 num_bufs
* 8 + /* STRMOUT_BUFFER_UPDATE */
88 3 /* set_streamout_enable(0) */;
90 begin
->num_dw
= 12 + /* flush_vgt_streamout */
91 6; /* set_streamout_enable */
93 if (rctx
->chip_class
>= SI
) {
94 begin
->num_dw
+= num_bufs
* 4; /* SET_CONTEXT_REG */
96 begin
->num_dw
+= num_bufs
* 7; /* SET_CONTEXT_REG */
98 if (rctx
->family
>= CHIP_RS780
&& rctx
->family
<= CHIP_RV740
)
99 begin
->num_dw
+= num_bufs
* 5; /* STRMOUT_BASE_UPDATE */
103 num_bufs_appended
* 8 + /* STRMOUT_BUFFER_UPDATE */
104 (num_bufs
- num_bufs_appended
) * 6 + /* STRMOUT_BUFFER_UPDATE */
105 (rctx
->family
> CHIP_R600
&& rctx
->family
< CHIP_RS780
? 2 : 0); /* SURFACE_BASE_UPDATE */
110 void r600_set_streamout_targets(struct pipe_context
*ctx
,
111 unsigned num_targets
,
112 struct pipe_stream_output_target
**targets
,
113 const unsigned *offsets
)
115 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
117 unsigned append_bitmask
= 0;
119 /* Stop streamout. */
120 if (rctx
->streamout
.num_targets
&& rctx
->streamout
.begin_emitted
) {
121 r600_emit_streamout_end(rctx
);
124 /* Set the new targets. */
125 for (i
= 0; i
< num_targets
; i
++) {
126 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->streamout
.targets
[i
], targets
[i
]);
127 r600_context_add_resource_size(ctx
, targets
[i
]->buffer
);
128 if (offsets
[i
] == ((unsigned)-1))
129 append_bitmask
|= 1 << i
;
131 for (; i
< rctx
->streamout
.num_targets
; i
++) {
132 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->streamout
.targets
[i
], NULL
);
135 rctx
->streamout
.enabled_mask
= (num_targets
>= 1 && targets
[0] ? 1 : 0) |
136 (num_targets
>= 2 && targets
[1] ? 2 : 0) |
137 (num_targets
>= 3 && targets
[2] ? 4 : 0) |
138 (num_targets
>= 4 && targets
[3] ? 8 : 0);
140 rctx
->streamout
.num_targets
= num_targets
;
141 rctx
->streamout
.append_bitmask
= append_bitmask
;
144 r600_streamout_buffers_dirty(rctx
);
146 rctx
->streamout
.begin_atom
.dirty
= false;
150 static void r600_flush_vgt_streamout(struct r600_common_context
*rctx
)
152 struct radeon_winsys_cs
*cs
= rctx
->rings
.gfx
.cs
;
153 unsigned reg_strmout_cntl
;
155 /* The register is at different places on different ASICs. */
156 if (rctx
->chip_class
>= CIK
) {
157 reg_strmout_cntl
= R_0300FC_CP_STRMOUT_CNTL
;
158 } else if (rctx
->chip_class
>= EVERGREEN
) {
159 reg_strmout_cntl
= R_0084FC_CP_STRMOUT_CNTL
;
161 reg_strmout_cntl
= R_008490_CP_STRMOUT_CNTL
;
164 if (rctx
->chip_class
>= CIK
) {
165 cik_write_uconfig_reg(cs
, reg_strmout_cntl
, 0);
167 r600_write_config_reg(cs
, reg_strmout_cntl
, 0);
170 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
171 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH
) | EVENT_INDEX(0));
173 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
174 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
); /* wait until the register is equal to the reference value */
175 radeon_emit(cs
, reg_strmout_cntl
>> 2); /* register */
177 radeon_emit(cs
, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
178 radeon_emit(cs
, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
179 radeon_emit(cs
, 4); /* poll interval */
182 static void r600_set_streamout_enable(struct r600_common_context
*rctx
, unsigned buffer_enable_bit
)
184 struct radeon_winsys_cs
*cs
= rctx
->rings
.gfx
.cs
;
186 if (buffer_enable_bit
) {
187 r600_write_context_reg(cs
, R_028AB0_VGT_STRMOUT_EN
, S_028AB0_STREAMOUT(1));
188 r600_write_context_reg(cs
, R_028B20_VGT_STRMOUT_BUFFER_EN
, buffer_enable_bit
);
190 r600_write_context_reg(cs
, R_028AB0_VGT_STRMOUT_EN
, S_028AB0_STREAMOUT(0));
194 static void evergreen_set_streamout_enable(struct r600_common_context
*rctx
, unsigned buffer_enable_bit
)
196 struct radeon_winsys_cs
*cs
= rctx
->rings
.gfx
.cs
;
198 if (buffer_enable_bit
) {
199 r600_write_context_reg_seq(cs
, R_028B94_VGT_STRMOUT_CONFIG
, 2);
200 radeon_emit(cs
, S_028B94_STREAMOUT_0_EN(1)); /* R_028B94_VGT_STRMOUT_CONFIG */
201 radeon_emit(cs
, S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit
)); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
203 r600_write_context_reg(cs
, R_028B94_VGT_STRMOUT_CONFIG
, S_028B94_STREAMOUT_0_EN(0));
207 static void r600_emit_streamout_begin(struct r600_common_context
*rctx
, struct r600_atom
*atom
)
209 struct radeon_winsys_cs
*cs
= rctx
->rings
.gfx
.cs
;
210 struct r600_so_target
**t
= rctx
->streamout
.targets
;
211 unsigned *stride_in_dw
= rctx
->streamout
.stride_in_dw
;
212 unsigned i
, update_flags
= 0;
214 r600_flush_vgt_streamout(rctx
);
216 if (rctx
->chip_class
>= EVERGREEN
) {
217 evergreen_set_streamout_enable(rctx
, rctx
->streamout
.enabled_mask
);
219 r600_set_streamout_enable(rctx
, rctx
->streamout
.enabled_mask
);
222 for (i
= 0; i
< rctx
->streamout
.num_targets
; i
++) {
226 t
[i
]->stride_in_dw
= stride_in_dw
[i
];
228 if (rctx
->chip_class
>= SI
) {
229 /* SI binds streamout buffers as shader resources.
230 * VGT only counts primitives and tells the shader
231 * through SGPRs what to do. */
232 r600_write_context_reg_seq(cs
, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+ 16*i
, 2);
233 radeon_emit(cs
, (t
[i
]->b
.buffer_offset
+
234 t
[i
]->b
.buffer_size
) >> 2); /* BUFFER_SIZE (in DW) */
235 radeon_emit(cs
, stride_in_dw
[i
]); /* VTX_STRIDE (in DW) */
237 uint64_t va
= r600_resource_va(rctx
->b
.screen
,
238 (void*)t
[i
]->b
.buffer
);
240 update_flags
|= SURFACE_BASE_UPDATE_STRMOUT(i
);
242 r600_write_context_reg_seq(cs
, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+ 16*i
, 3);
243 radeon_emit(cs
, (t
[i
]->b
.buffer_offset
+
244 t
[i
]->b
.buffer_size
) >> 2); /* BUFFER_SIZE (in DW) */
245 radeon_emit(cs
, stride_in_dw
[i
]); /* VTX_STRIDE (in DW) */
246 radeon_emit(cs
, va
>> 8); /* BUFFER_BASE */
248 r600_emit_reloc(rctx
, &rctx
->rings
.gfx
, r600_resource(t
[i
]->b
.buffer
),
249 RADEON_USAGE_WRITE
, RADEON_PRIO_SHADER_RESOURCE_RW
);
251 /* R7xx requires this packet after updating BUFFER_BASE.
252 * Without this, R7xx locks up. */
253 if (rctx
->family
>= CHIP_RS780
&& rctx
->family
<= CHIP_RV740
) {
254 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BASE_UPDATE
, 1, 0));
256 radeon_emit(cs
, va
>> 8);
258 r600_emit_reloc(rctx
, &rctx
->rings
.gfx
, r600_resource(t
[i
]->b
.buffer
),
259 RADEON_USAGE_WRITE
, RADEON_PRIO_SHADER_RESOURCE_RW
);
263 if (rctx
->streamout
.append_bitmask
& (1 << i
)) {
264 uint64_t va
= r600_resource_va(rctx
->b
.screen
,
265 (void*)t
[i
]->buf_filled_size
) +
266 t
[i
]->buf_filled_size_offset
;
269 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
270 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
271 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM
)); /* control */
272 radeon_emit(cs
, 0); /* unused */
273 radeon_emit(cs
, 0); /* unused */
274 radeon_emit(cs
, va
); /* src address lo */
275 radeon_emit(cs
, va
>> 32); /* src address hi */
277 r600_emit_reloc(rctx
, &rctx
->rings
.gfx
, t
[i
]->buf_filled_size
,
278 RADEON_USAGE_READ
, RADEON_PRIO_MIN
);
280 /* Start from the beginning. */
281 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
282 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
283 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET
)); /* control */
284 radeon_emit(cs
, 0); /* unused */
285 radeon_emit(cs
, 0); /* unused */
286 radeon_emit(cs
, t
[i
]->b
.buffer_offset
>> 2); /* buffer offset in DW */
287 radeon_emit(cs
, 0); /* unused */
291 if (rctx
->family
> CHIP_R600
&& rctx
->family
< CHIP_RV770
) {
292 radeon_emit(cs
, PKT3(PKT3_SURFACE_BASE_UPDATE
, 0, 0));
293 radeon_emit(cs
, update_flags
);
295 rctx
->streamout
.begin_emitted
= true;
298 void r600_emit_streamout_end(struct r600_common_context
*rctx
)
300 struct radeon_winsys_cs
*cs
= rctx
->rings
.gfx
.cs
;
301 struct r600_so_target
**t
= rctx
->streamout
.targets
;
305 r600_flush_vgt_streamout(rctx
);
307 for (i
= 0; i
< rctx
->streamout
.num_targets
; i
++) {
311 va
= r600_resource_va(rctx
->b
.screen
,
312 (void*)t
[i
]->buf_filled_size
) + t
[i
]->buf_filled_size_offset
;
313 radeon_emit(cs
, PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0));
314 radeon_emit(cs
, STRMOUT_SELECT_BUFFER(i
) |
315 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE
) |
316 STRMOUT_STORE_BUFFER_FILLED_SIZE
); /* control */
317 radeon_emit(cs
, va
); /* dst address lo */
318 radeon_emit(cs
, va
>> 32); /* dst address hi */
319 radeon_emit(cs
, 0); /* unused */
320 radeon_emit(cs
, 0); /* unused */
322 r600_emit_reloc(rctx
, &rctx
->rings
.gfx
, t
[i
]->buf_filled_size
,
323 RADEON_USAGE_WRITE
, RADEON_PRIO_MIN
);
326 if (rctx
->chip_class
>= EVERGREEN
) {
327 evergreen_set_streamout_enable(rctx
, 0);
329 r600_set_streamout_enable(rctx
, 0);
332 rctx
->streamout
.begin_emitted
= false;
334 if (rctx
->chip_class
>= R700
) {
335 rctx
->flags
|= R600_CONTEXT_STREAMOUT_FLUSH
;
337 rctx
->flags
|= R600_CONTEXT_FLUSH_AND_INV
;
341 void r600_streamout_init(struct r600_common_context
*rctx
)
343 rctx
->b
.create_stream_output_target
= r600_create_so_target
;
344 rctx
->b
.stream_output_target_destroy
= r600_so_target_destroy
;
345 rctx
->streamout
.begin_atom
.emit
= r600_emit_streamout_begin
;