2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Christian König <christian.koenig@amd.com>
27 #include "radeonsi_pipe.h"
35 void si_context_streamout_begin(struct r600_context
*ctx
)
37 struct radeon_winsys_cs
*cs
= ctx
->cs
;
38 struct si_so_target
**t
= ctx
->so_targets
;
39 unsigned *strides
= ctx
->vs_shader_so_strides
;
40 unsigned buffer_en
, i
;
42 buffer_en
= (ctx
->num_so_targets
>= 1 && t
[0] ? 1 : 0) |
43 (ctx
->num_so_targets
>= 2 && t
[1] ? 2 : 0) |
44 (ctx
->num_so_targets
>= 3 && t
[2] ? 4 : 0) |
45 (ctx
->num_so_targets
>= 4 && t
[3] ? 8 : 0);
47 ctx
->num_cs_dw_streamout_end
=
48 12 + /* flush_vgt_streamout */
49 util_bitcount(buffer_en
) * 8 +
53 12 + /* flush_vgt_streamout */
55 util_bitcount(buffer_en
& ctx
->streamout_append_bitmask
) * 8 +
56 util_bitcount(buffer_en
& ~ctx
->streamout_append_bitmask
) * 6 +
57 ctx
->num_cs_dw_streamout_end
, TRUE
);
59 if (ctx
->chip_class
>= CAYMAN
) {
60 evergreen_flush_vgt_streamout(ctx
);
61 evergreen_set_streamout_enable(ctx
, buffer_en
);
64 for (i
= 0; i
< ctx
->num_so_targets
; i
++) {
67 t
[i
]->stride
= strides
[i
];
70 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 3, 0);
71 cs
->buf
[cs
->cdw
++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+
72 16*i
- SI_CONTEXT_REG_OFFSET
) >> 2;
73 cs
->buf
[cs
->cdw
++] = (t
[i
]->b
.buffer_offset
+
74 t
[i
]->b
.buffer_size
) >> 2; /* BUFFER_SIZE (in DW) */
75 cs
->buf
[cs
->cdw
++] = strides
[i
] >> 2; /* VTX_STRIDE (in DW) */
76 cs
->buf
[cs
->cdw
++] = 0; /* BUFFER_BASE */
78 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
80 si_context_bo_reloc(ctx
, si_resource(t
[i
]->b
.buffer
),
83 if (ctx
->streamout_append_bitmask
& (1 << i
)) {
85 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0);
86 cs
->buf
[cs
->cdw
++] = STRMOUT_SELECT_BUFFER(i
) |
87 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM
); /* control */
88 cs
->buf
[cs
->cdw
++] = 0; /* unused */
89 cs
->buf
[cs
->cdw
++] = 0; /* unused */
90 cs
->buf
[cs
->cdw
++] = 0; /* src address lo */
91 cs
->buf
[cs
->cdw
++] = 0; /* src address hi */
93 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
95 si_context_bo_reloc(ctx
, t
[i
]->filled_size
,
98 /* Start from the beginning. */
99 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0);
100 cs
->buf
[cs
->cdw
++] = STRMOUT_SELECT_BUFFER(i
) |
101 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET
); /* control */
102 cs
->buf
[cs
->cdw
++] = 0; /* unused */
103 cs
->buf
[cs
->cdw
++] = 0; /* unused */
104 cs
->buf
[cs
->cdw
++] = t
[i
]->b
.buffer_offset
>> 2; /* buffer offset in DW */
105 cs
->buf
[cs
->cdw
++] = 0; /* unused */
112 void si_context_streamout_end(struct r600_context
*ctx
)
114 struct radeon_winsys_cs
*cs
= ctx
->cs
;
115 struct si_so_target
**t
= ctx
->so_targets
;
116 unsigned i
, flush_flags
= 0;
118 evergreen_flush_vgt_streamout(ctx
);
120 for (i
= 0; i
< ctx
->num_so_targets
; i
++) {
123 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0);
124 cs
->buf
[cs
->cdw
++] = STRMOUT_SELECT_BUFFER(i
) |
125 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE
) |
126 STRMOUT_STORE_BUFFER_FILLED_SIZE
; /* control */
127 cs
->buf
[cs
->cdw
++] = 0; /* dst address lo */
128 cs
->buf
[cs
->cdw
++] = 0; /* dst address hi */
129 cs
->buf
[cs
->cdw
++] = 0; /* unused */
130 cs
->buf
[cs
->cdw
++] = 0; /* unused */
132 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
134 si_context_bo_reloc(ctx
, t
[i
]->filled_size
,
137 flush_flags
|= S_0085F0_SO0_DEST_BASE_ENA(1) << i
;
142 evergreen_set_streamout_enable(ctx
, 0);
144 ctx
->atom_surface_sync
.flush_flags
|= flush_flags
;
145 si_atom_dirty(ctx
, &ctx
->atom_surface_sync
.atom
);
147 ctx
->num_cs_dw_streamout_end
= 0;
149 /* XXX print some debug info */
150 for (i
= 0; i
< ctx
->num_so_targets
; i
++) {
154 uint32_t *ptr
= ctx
->ws
->buffer_map(t
[i
]->filled_size
->cs_buf
, ctx
->cs
, RADEON_USAGE_READ
);
155 printf("FILLED_SIZE%i: %u\n", i
, *ptr
);
156 ctx
->ws
->buffer_unmap(t
[i
]->filled_size
->cs_buf
);
160 void evergreen_flush_vgt_streamout(struct si_context
*ctx
)
162 struct radeon_winsys_cs
*cs
= ctx
->cs
;
164 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONFIG_REG
, 1, 0);
165 cs
->buf
[cs
->cdw
++] = (R_0084FC_CP_STRMOUT_CNTL
- SI_CONFIG_REG_OFFSET
) >> 2;
166 cs
->buf
[cs
->cdw
++] = 0;
168 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
169 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH
) | EVENT_INDEX(0);
171 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_WAIT_REG_MEM
, 5, 0);
172 cs
->buf
[cs
->cdw
++] = WAIT_REG_MEM_EQUAL
; /* wait until the register is equal to the reference value */
173 cs
->buf
[cs
->cdw
++] = R_0084FC_CP_STRMOUT_CNTL
>> 2; /* register */
174 cs
->buf
[cs
->cdw
++] = 0;
175 cs
->buf
[cs
->cdw
++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
176 cs
->buf
[cs
->cdw
++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
177 cs
->buf
[cs
->cdw
++] = 4; /* poll interval */
180 void evergreen_set_streamout_enable(struct si_context
*ctx
, unsigned buffer_enable_bit
)
182 struct radeon_winsys_cs
*cs
= ctx
->cs
;
184 if (buffer_enable_bit
) {
185 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
186 cs
->buf
[cs
->cdw
++] = (R_028B94_VGT_STRMOUT_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
187 cs
->buf
[cs
->cdw
++] = S_028B94_STREAMOUT_0_EN(1);
189 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
190 cs
->buf
[cs
->cdw
++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
191 cs
->buf
[cs
->cdw
++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit
);
193 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
194 cs
->buf
[cs
->cdw
++] = (R_028B94_VGT_STRMOUT_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
195 cs
->buf
[cs
->cdw
++] = S_028B94_STREAMOUT_0_EN(0);
201 struct pipe_stream_output_target
*
202 si_create_so_target(struct pipe_context
*ctx
,
203 struct pipe_resource
*buffer
,
204 unsigned buffer_offset
,
205 unsigned buffer_size
)
208 struct si_context
*rctx
= (struct r600_context
*)ctx
;
209 struct si_so_target
*t
;
212 t
= CALLOC_STRUCT(si_so_target
);
217 t
->b
.reference
.count
= 1;
219 pipe_resource_reference(&t
->b
.buffer
, buffer
);
220 t
->b
.buffer_offset
= buffer_offset
;
221 t
->b
.buffer_size
= buffer_size
;
223 t
->filled_size
= si_resource_create_custom(ctx
->screen
, PIPE_USAGE_STATIC
, 4);
224 ptr
= rctx
->ws
->buffer_map(t
->filled_size
->cs_buf
, rctx
->cs
, PIPE_TRANSFER_WRITE
);
225 memset(ptr
, 0, t
->filled_size
->buf
->size
);
226 rctx
->ws
->buffer_unmap(t
->filled_size
->cs_buf
);
233 void si_so_target_destroy(struct pipe_context
*ctx
,
234 struct pipe_stream_output_target
*target
)
237 struct si_so_target
*t
= (struct r600_so_target
*)target
;
238 pipe_resource_reference(&t
->b
.buffer
, NULL
);
239 si_resource_reference(&t
->filled_size
, NULL
);
244 void si_set_so_targets(struct pipe_context
*ctx
,
245 unsigned num_targets
,
246 struct pipe_stream_output_target
**targets
,
247 unsigned append_bitmask
)
249 assert(num_targets
== 0);
251 struct si_context
*rctx
= (struct r600_context
*)ctx
;
254 /* Stop streamout. */
255 if (rctx
->num_so_targets
) {
256 si_context_streamout_end(rctx
);
259 /* Set the new targets. */
260 for (i
= 0; i
< num_targets
; i
++) {
261 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->so_targets
[i
], targets
[i
]);
263 for (; i
< rctx
->num_so_targets
; i
++) {
264 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->so_targets
[i
], NULL
);
267 rctx
->num_so_targets
= num_targets
;
268 rctx
->streamout_start
= num_targets
!= 0;
269 rctx
->streamout_append_bitmask
= append_bitmask
;