freedreno/a6xx: split out const emit
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_const.c
1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "fd6_const.h"
26
27 /* regid: base const register
28 * prsc or dwords: buffer containing constant values
29 * sizedwords: size of const value buffer
30 */
31 void
32 fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
33 uint32_t regid, uint32_t offset, uint32_t sizedwords,
34 const uint32_t *dwords, struct pipe_resource *prsc)
35 {
36 uint32_t i, sz, align_sz;
37 enum a6xx_state_src src;
38
39 debug_assert((regid % 4) == 0);
40
41 if (prsc) {
42 sz = 0;
43 src = SS6_INDIRECT;
44 } else {
45 sz = sizedwords;
46 src = SS6_DIRECT;
47 }
48
49 align_sz = align(sz, 4);
50
51 OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz);
52 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
53 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
54 CP_LOAD_STATE6_0_STATE_SRC(src) |
55 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
56 CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4)));
57 if (prsc) {
58 struct fd_bo *bo = fd_resource(prsc)->bo;
59 OUT_RELOC(ring, bo, offset, 0, 0);
60 } else {
61 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
62 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
63 dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
64 }
65
66 for (i = 0; i < sz; i++) {
67 OUT_RING(ring, dwords[i]);
68 }
69
70 /* Zero-pad to multiple of 4 dwords */
71 for (i = sz; i < align_sz; i++) {
72 OUT_RING(ring, 0);
73 }
74 }
75
76 void
77 fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
78 uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
79 {
80 uint32_t anum = align(num, 2);
81 uint32_t i;
82
83 debug_assert((regid % 4) == 0);
84
85 OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum));
86 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
87 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
88 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
89 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
90 CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
91 OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
92 OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
93
94 for (i = 0; i < num; i++) {
95 if (prscs[i]) {
96 if (write) {
97 OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
98 } else {
99 OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
100 }
101 } else {
102 OUT_RING(ring, 0xbad00000 | (i << 16));
103 OUT_RING(ring, 0xbad00000 | (i << 16));
104 }
105 }
106
107 for (; i < anum; i++) {
108 OUT_RING(ring, 0xffffffff);
109 OUT_RING(ring, 0xffffffff);
110 }
111 }
112
113 static void
114 emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s)
115 {
116 struct fd_context *ctx = emit->ctx;
117 const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4;
118 uint32_t dwords = 16;
119
120 OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
121 OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
122 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
123 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
124 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
125 CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
126 OUT_RB(ring, ctx->batch->tess_addrs_constobj);
127 }
128
129 static void
130 emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
131 uint32_t *params, int num_params)
132 {
133 const unsigned regid = v->shader->const_state.offsets.primitive_param;
134 int size = MIN2(1 + regid, v->constlen) - regid;
135 if (size > 0)
136 fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL);
137 }
138
139 static void
140 emit_tess_consts(struct fd6_emit *emit)
141 {
142 struct fd_context *ctx = emit->ctx;
143
144 struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
145 ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
146
147 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
148 * size is dwords, since that's what LDG/STG use.
149 */
150 unsigned num_vertices =
151 emit->hs ?
152 emit->info->vertices_per_patch :
153 emit->gs->shader->nir->info.gs.vertices_in;
154
155 uint32_t vs_params[4] = {
156 emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
157 emit->vs->shader->output_size * 4, /* vs vertex stride */
158 0,
159 0
160 };
161
162 emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
163
164 if (emit->hs) {
165 uint32_t hs_params[4] = {
166 emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
167 emit->vs->shader->output_size * 4, /* vs vertex stride */
168 emit->hs->shader->output_size,
169 emit->info->vertices_per_patch
170 };
171
172 emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params));
173 emit_tess_bos(constobj, emit, emit->hs);
174
175 if (emit->gs)
176 num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
177
178 uint32_t ds_params[4] = {
179 emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */
180 emit->ds->shader->output_size * 4, /* ds vertex stride */
181 emit->hs->shader->output_size, /* hs vertex stride (dwords) */
182 emit->hs->shader->nir->info.tess.tcs_vertices_out
183 };
184
185 emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params));
186 emit_tess_bos(constobj, emit, emit->ds);
187 }
188
189 if (emit->gs) {
190 struct ir3_shader_variant *prev;
191 if (emit->ds)
192 prev = emit->ds;
193 else
194 prev = emit->vs;
195
196 uint32_t gs_params[4] = {
197 prev->shader->output_size * num_vertices * 4, /* ds primitive stride */
198 prev->shader->output_size * 4, /* ds vertex stride */
199 0,
200 0,
201 };
202
203 num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
204 emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params));
205 }
206
207 fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL);
208 }
209
210 static void
211 emit_user_consts(struct fd6_emit *emit)
212 {
213 static const enum pipe_shader_type types[] = {
214 PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
215 PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
216 };
217 const struct ir3_shader_variant *variants[] = {
218 emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
219 };
220 struct fd_context *ctx = emit->ctx;
221 unsigned sz = 0;
222
223 for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
224 if (!variants[i])
225 continue;
226 sz += variants[i]->shader->ubo_state.cmdstream_size;
227 }
228
229 struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
230 ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
231
232 for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
233 if (!variants[i])
234 continue;
235 ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
236 ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
237 }
238
239 fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL);
240 }
241
242 void
243 fd6_emit_consts(struct fd6_emit *emit)
244 {
245 struct fd_context *ctx = emit->ctx;
246 struct fd6_context *fd6_ctx = fd6_context(ctx);
247
248 if (emit->dirty & (FD_DIRTY_CONST | FD_DIRTY_PROG))
249 emit_user_consts(emit);
250
251 if (emit->key.key.has_gs || emit->key.key.tessellation)
252 emit_tess_consts(emit);
253
254 /* if driver-params are needed, emit each time: */
255 const struct ir3_shader_variant *vs = emit->vs;
256 if (ir3_needs_vs_driver_params(vs)) {
257 struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
258 ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
259 ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info);
260 fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
261 fd6_ctx->has_dp_state = true;
262 } else if (fd6_ctx->has_dp_state) {
263 fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
264 fd6_ctx->has_dp_state = false;
265 }
266 }
267
268 void
269 fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
270 enum pipe_shader_type stage, struct fd_ringbuffer *ring)
271 {
272 struct fd_context *ctx = emit->ctx;
273
274 ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]);
275 ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]);
276 }
277
278 void
279 fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
280 struct fd_context *ctx, const struct pipe_grid_info *info)
281 {
282 ir3_emit_cs_consts(v, ring, ctx, info);
283 }
284
285 void
286 fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
287 struct fd_ringbuffer *ring)
288 {
289 ir3_emit_immediates(screen, v, ring);
290 }
291
292 void
293 fd6_user_consts_size(struct ir3_ubo_analysis_state *state,
294 unsigned *packets, unsigned *size)
295 {
296 ir3_user_consts_size(state, packets, size);
297 }
298
299 void
300 fd6_emit_link_map(struct fd_screen *screen,
301 const struct ir3_shader_variant *producer,
302 const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
303 {
304 ir3_emit_link_map(screen, producer, v, ring);
305 }