2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Christian König <christian.koenig@amd.com>
27 #include "radeonsi_pipe.h"
35 void si_context_streamout_begin(struct r600_context
*ctx
)
37 struct radeon_winsys_cs
*cs
= ctx
->cs
;
38 struct si_so_target
**t
= ctx
->so_targets
;
39 unsigned *strides
= ctx
->vs_shader_so_strides
;
40 unsigned buffer_en
, i
;
42 buffer_en
= (ctx
->num_so_targets
>= 1 && t
[0] ? 1 : 0) |
43 (ctx
->num_so_targets
>= 2 && t
[1] ? 2 : 0) |
44 (ctx
->num_so_targets
>= 3 && t
[2] ? 4 : 0) |
45 (ctx
->num_so_targets
>= 4 && t
[3] ? 8 : 0);
47 ctx
->num_cs_dw_streamout_end
=
48 12 + /* flush_vgt_streamout */
49 util_bitcount(buffer_en
) * 8 +
53 12 + /* flush_vgt_streamout */
55 util_bitcount(buffer_en
& ctx
->streamout_append_bitmask
) * 8 +
56 util_bitcount(buffer_en
& ~ctx
->streamout_append_bitmask
) * 6 +
57 ctx
->num_cs_dw_streamout_end
, TRUE
);
59 evergreen_flush_vgt_streamout(ctx
);
60 evergreen_set_streamout_enable(ctx
, buffer_en
);
62 for (i
= 0; i
< ctx
->num_so_targets
; i
++) {
65 t
[i
]->stride
= strides
[i
];
68 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 3, 0);
69 cs
->buf
[cs
->cdw
++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0
+
70 16*i
- SI_CONTEXT_REG_OFFSET
) >> 2;
71 cs
->buf
[cs
->cdw
++] = (t
[i
]->b
.buffer_offset
+
72 t
[i
]->b
.buffer_size
) >> 2; /* BUFFER_SIZE (in DW) */
73 cs
->buf
[cs
->cdw
++] = strides
[i
] >> 2; /* VTX_STRIDE (in DW) */
74 cs
->buf
[cs
->cdw
++] = 0; /* BUFFER_BASE */
76 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
78 si_context_bo_reloc(ctx
, si_resource(t
[i
]->b
.buffer
),
81 if (ctx
->streamout_append_bitmask
& (1 << i
)) {
83 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0);
84 cs
->buf
[cs
->cdw
++] = STRMOUT_SELECT_BUFFER(i
) |
85 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM
); /* control */
86 cs
->buf
[cs
->cdw
++] = 0; /* unused */
87 cs
->buf
[cs
->cdw
++] = 0; /* unused */
88 cs
->buf
[cs
->cdw
++] = 0; /* src address lo */
89 cs
->buf
[cs
->cdw
++] = 0; /* src address hi */
91 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
93 si_context_bo_reloc(ctx
, t
[i
]->filled_size
,
96 /* Start from the beginning. */
97 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0);
98 cs
->buf
[cs
->cdw
++] = STRMOUT_SELECT_BUFFER(i
) |
99 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET
); /* control */
100 cs
->buf
[cs
->cdw
++] = 0; /* unused */
101 cs
->buf
[cs
->cdw
++] = 0; /* unused */
102 cs
->buf
[cs
->cdw
++] = t
[i
]->b
.buffer_offset
>> 2; /* buffer offset in DW */
103 cs
->buf
[cs
->cdw
++] = 0; /* unused */
110 void si_context_streamout_end(struct r600_context
*ctx
)
112 struct radeon_winsys_cs
*cs
= ctx
->cs
;
113 struct si_so_target
**t
= ctx
->so_targets
;
114 unsigned i
, flush_flags
= 0;
116 evergreen_flush_vgt_streamout(ctx
);
118 for (i
= 0; i
< ctx
->num_so_targets
; i
++) {
121 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE
, 4, 0);
122 cs
->buf
[cs
->cdw
++] = STRMOUT_SELECT_BUFFER(i
) |
123 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE
) |
124 STRMOUT_STORE_BUFFER_FILLED_SIZE
; /* control */
125 cs
->buf
[cs
->cdw
++] = 0; /* dst address lo */
126 cs
->buf
[cs
->cdw
++] = 0; /* dst address hi */
127 cs
->buf
[cs
->cdw
++] = 0; /* unused */
128 cs
->buf
[cs
->cdw
++] = 0; /* unused */
130 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
132 si_context_bo_reloc(ctx
, t
[i
]->filled_size
,
135 flush_flags
|= S_0085F0_SO0_DEST_BASE_ENA(1) << i
;
140 evergreen_set_streamout_enable(ctx
, 0);
142 ctx
->atom_surface_sync
.flush_flags
|= flush_flags
;
143 si_atom_dirty(ctx
, &ctx
->atom_surface_sync
.atom
);
145 ctx
->num_cs_dw_streamout_end
= 0;
147 /* XXX print some debug info */
148 for (i
= 0; i
< ctx
->num_so_targets
; i
++) {
152 uint32_t *ptr
= ctx
->ws
->buffer_map(t
[i
]->filled_size
->cs_buf
, ctx
->cs
, RADEON_USAGE_READ
);
153 printf("FILLED_SIZE%i: %u\n", i
, *ptr
);
154 ctx
->ws
->buffer_unmap(t
[i
]->filled_size
->cs_buf
);
158 void evergreen_flush_vgt_streamout(struct si_context
*ctx
)
160 struct radeon_winsys_cs
*cs
= ctx
->cs
;
162 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONFIG_REG
, 1, 0);
163 cs
->buf
[cs
->cdw
++] = (R_0084FC_CP_STRMOUT_CNTL
- SI_CONFIG_REG_OFFSET
) >> 2;
164 cs
->buf
[cs
->cdw
++] = 0;
166 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
167 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH
) | EVENT_INDEX(0);
169 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_WAIT_REG_MEM
, 5, 0);
170 cs
->buf
[cs
->cdw
++] = WAIT_REG_MEM_EQUAL
; /* wait until the register is equal to the reference value */
171 cs
->buf
[cs
->cdw
++] = R_0084FC_CP_STRMOUT_CNTL
>> 2; /* register */
172 cs
->buf
[cs
->cdw
++] = 0;
173 cs
->buf
[cs
->cdw
++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
174 cs
->buf
[cs
->cdw
++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
175 cs
->buf
[cs
->cdw
++] = 4; /* poll interval */
178 void evergreen_set_streamout_enable(struct si_context
*ctx
, unsigned buffer_enable_bit
)
180 struct radeon_winsys_cs
*cs
= ctx
->cs
;
182 if (buffer_enable_bit
) {
183 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
184 cs
->buf
[cs
->cdw
++] = (R_028B94_VGT_STRMOUT_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
185 cs
->buf
[cs
->cdw
++] = S_028B94_STREAMOUT_0_EN(1);
187 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
188 cs
->buf
[cs
->cdw
++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
189 cs
->buf
[cs
->cdw
++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit
);
191 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
192 cs
->buf
[cs
->cdw
++] = (R_028B94_VGT_STRMOUT_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
193 cs
->buf
[cs
->cdw
++] = S_028B94_STREAMOUT_0_EN(0);
199 struct pipe_stream_output_target
*
200 si_create_so_target(struct pipe_context
*ctx
,
201 struct pipe_resource
*buffer
,
202 unsigned buffer_offset
,
203 unsigned buffer_size
)
206 struct si_context
*rctx
= (struct r600_context
*)ctx
;
207 struct si_so_target
*t
;
210 t
= CALLOC_STRUCT(si_so_target
);
215 t
->b
.reference
.count
= 1;
217 pipe_resource_reference(&t
->b
.buffer
, buffer
);
218 t
->b
.buffer_offset
= buffer_offset
;
219 t
->b
.buffer_size
= buffer_size
;
221 t
->filled_size
= si_resource_create_custom(ctx
->screen
, PIPE_USAGE_STATIC
, 4);
222 ptr
= rctx
->ws
->buffer_map(t
->filled_size
->cs_buf
, rctx
->cs
, PIPE_TRANSFER_WRITE
);
223 memset(ptr
, 0, t
->filled_size
->buf
->size
);
224 rctx
->ws
->buffer_unmap(t
->filled_size
->cs_buf
);
231 void si_so_target_destroy(struct pipe_context
*ctx
,
232 struct pipe_stream_output_target
*target
)
235 struct si_so_target
*t
= (struct r600_so_target
*)target
;
236 pipe_resource_reference(&t
->b
.buffer
, NULL
);
237 si_resource_reference(&t
->filled_size
, NULL
);
242 void si_set_so_targets(struct pipe_context
*ctx
,
243 unsigned num_targets
,
244 struct pipe_stream_output_target
**targets
,
245 unsigned append_bitmask
)
247 assert(num_targets
== 0);
249 struct si_context
*rctx
= (struct r600_context
*)ctx
;
252 /* Stop streamout. */
253 if (rctx
->num_so_targets
) {
254 si_context_streamout_end(rctx
);
257 /* Set the new targets. */
258 for (i
= 0; i
< num_targets
; i
++) {
259 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->so_targets
[i
], targets
[i
]);
261 for (; i
< rctx
->num_so_targets
; i
++) {
262 pipe_so_target_reference((struct pipe_stream_output_target
**)&rctx
->so_targets
[i
], NULL
);
265 rctx
->num_so_targets
= num_targets
;
266 rctx
->streamout_start
= num_targets
!= 0;
267 rctx
->streamout_append_bitmask
= append_bitmask
;