2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "fd6_const.h"
28 #define emit_const_user fd6_emit_const_user
29 #define emit_const_bo fd6_emit_const_bo
30 #include "ir3_const.h"
32 /* regid: base const register
33 * prsc or dwords: buffer containing constant values
34 * sizedwords: size of const value buffer
37 fd6_emit_const_user(struct fd_ringbuffer
*ring
,
38 const struct ir3_shader_variant
*v
, uint32_t regid
,
39 uint32_t sizedwords
, const uint32_t *dwords
)
41 emit_const_asserts(ring
, v
, regid
, sizedwords
);
43 /* NOTE we cheat a bit here, since we know mesa is aligning
44 * the size of the user buffer to 16 bytes. And we want to
45 * cut cycles in a hot path.
47 uint32_t align_sz
= align(sizedwords
, 4);
49 if (fd6_geom_stage(v
->type
)) {
50 OUT_PKTBUF(ring
, CP_LOAD_STATE6_GEOM
, dwords
, align_sz
,
53 .state_type
= ST6_CONSTANTS
,
54 .state_src
= SS6_DIRECT
,
55 .state_block
= fd6_stage2shadersb(v
->type
),
56 .num_unit
= DIV_ROUND_UP(sizedwords
, 4)
62 OUT_PKTBUF(ring
, CP_LOAD_STATE6_FRAG
, dwords
, align_sz
,
65 .state_type
= ST6_CONSTANTS
,
66 .state_src
= SS6_DIRECT
,
67 .state_block
= fd6_stage2shadersb(v
->type
),
68 .num_unit
= DIV_ROUND_UP(sizedwords
, 4)
76 fd6_emit_const_bo(struct fd_ringbuffer
*ring
,
77 const struct ir3_shader_variant
*v
, uint32_t regid
,
78 uint32_t offset
, uint32_t sizedwords
, struct fd_bo
*bo
)
80 uint32_t dst_off
= regid
/ 4;
81 assert(dst_off
% 4 == 0);
82 uint32_t num_unit
= DIV_ROUND_UP(sizedwords
, 4);
83 assert(num_unit
% 4 == 0);
85 emit_const_asserts(ring
, v
, regid
, sizedwords
);
87 if (fd6_geom_stage(v
->type
)) {
88 OUT_PKT(ring
, CP_LOAD_STATE6_GEOM
,
91 .state_type
= ST6_CONSTANTS
,
92 .state_src
= SS6_INDIRECT
,
93 .state_block
= fd6_stage2shadersb(v
->type
),
96 CP_LOAD_STATE6_EXT_SRC_ADDR(
102 OUT_PKT(ring
, CP_LOAD_STATE6_FRAG
,
105 .state_type
= ST6_CONSTANTS
,
106 .state_src
= SS6_INDIRECT
,
107 .state_block
= fd6_stage2shadersb(v
->type
),
108 .num_unit
= num_unit
,
110 CP_LOAD_STATE6_EXT_SRC_ADDR(
119 is_stateobj(struct fd_ringbuffer
*ring
)
125 emit_const_ptrs(struct fd_ringbuffer
*ring
,
126 const struct ir3_shader_variant
*v
, uint32_t dst_offset
,
127 uint32_t num
, struct pipe_resource
**prscs
, uint32_t *offsets
)
129 unreachable("shouldn't be called on a6xx");
133 emit_tess_bos(struct fd_ringbuffer
*ring
, struct fd6_emit
*emit
, struct ir3_shader_variant
*s
)
135 struct fd_context
*ctx
= emit
->ctx
;
136 const struct ir3_const_state
*const_state
= ir3_const_state(s
);
137 const unsigned regid
= const_state
->offsets
.primitive_param
* 4 + 4;
138 uint32_t dwords
= 16;
140 OUT_PKT7(ring
, fd6_stage2opcode(s
->type
), 3);
141 OUT_RING(ring
, CP_LOAD_STATE6_0_DST_OFF(regid
/ 4) |
142 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
)|
143 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
144 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s
->type
)) |
145 CP_LOAD_STATE6_0_NUM_UNIT(dwords
/ 4));
146 OUT_RB(ring
, ctx
->batch
->tess_addrs_constobj
);
150 emit_stage_tess_consts(struct fd_ringbuffer
*ring
, struct ir3_shader_variant
*v
,
151 uint32_t *params
, int num_params
)
153 const struct ir3_const_state
*const_state
= ir3_const_state(v
);
154 const unsigned regid
= const_state
->offsets
.primitive_param
;
155 int size
= MIN2(1 + regid
, v
->constlen
) - regid
;
157 fd6_emit_const_user(ring
, v
, regid
* 4, num_params
, params
);
161 emit_tess_consts(struct fd6_emit
*emit
)
163 struct fd_context
*ctx
= emit
->ctx
;
165 struct fd_ringbuffer
*constobj
= fd_submit_new_ringbuffer(
166 ctx
->batch
->submit
, 0x1000, FD_RINGBUFFER_STREAMING
);
168 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
169 * size is dwords, since that's what LDG/STG use.
171 unsigned num_vertices
=
173 emit
->info
->vertices_per_patch
:
174 emit
->gs
->shader
->nir
->info
.gs
.vertices_in
;
176 uint32_t vs_params
[4] = {
177 emit
->vs
->output_size
* num_vertices
* 4, /* vs primitive stride */
178 emit
->vs
->output_size
* 4, /* vs vertex stride */
183 emit_stage_tess_consts(constobj
, emit
->vs
, vs_params
, ARRAY_SIZE(vs_params
));
186 uint32_t hs_params
[4] = {
187 emit
->vs
->output_size
* num_vertices
* 4, /* vs primitive stride */
188 emit
->vs
->output_size
* 4, /* vs vertex stride */
189 emit
->hs
->output_size
,
190 emit
->info
->vertices_per_patch
193 emit_stage_tess_consts(constobj
, emit
->hs
, hs_params
, ARRAY_SIZE(hs_params
));
194 emit_tess_bos(constobj
, emit
, emit
->hs
);
197 num_vertices
= emit
->gs
->shader
->nir
->info
.gs
.vertices_in
;
199 uint32_t ds_params
[4] = {
200 emit
->ds
->output_size
* num_vertices
* 4, /* ds primitive stride */
201 emit
->ds
->output_size
* 4, /* ds vertex stride */
202 emit
->hs
->output_size
, /* hs vertex stride (dwords) */
203 emit
->hs
->shader
->nir
->info
.tess
.tcs_vertices_out
206 emit_stage_tess_consts(constobj
, emit
->ds
, ds_params
, ARRAY_SIZE(ds_params
));
207 emit_tess_bos(constobj
, emit
, emit
->ds
);
211 struct ir3_shader_variant
*prev
;
217 uint32_t gs_params
[4] = {
218 prev
->output_size
* num_vertices
* 4, /* ds primitive stride */
219 prev
->output_size
* 4, /* ds vertex stride */
224 num_vertices
= emit
->gs
->shader
->nir
->info
.gs
.vertices_in
;
225 emit_stage_tess_consts(constobj
, emit
->gs
, gs_params
, ARRAY_SIZE(gs_params
));
228 fd6_emit_take_group(emit
, constobj
, FD6_GROUP_PRIMITIVE_PARAMS
, ENABLE_ALL
);
232 fd6_emit_ubos(struct fd_context
*ctx
, const struct ir3_shader_variant
*v
,
233 struct fd_ringbuffer
*ring
, struct fd_constbuf_stateobj
*constbuf
)
235 const struct ir3_const_state
*const_state
= ir3_const_state(v
);
236 int num_ubos
= const_state
->num_ubos
;
241 OUT_PKT7(ring
, fd6_stage2opcode(v
->type
), 3 + (2 * num_ubos
));
242 OUT_RING(ring
, CP_LOAD_STATE6_0_DST_OFF(0) |
243 CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO
)|
244 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
245 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v
->type
)) |
246 CP_LOAD_STATE6_0_NUM_UNIT(num_ubos
));
247 OUT_RING(ring
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
248 OUT_RING(ring
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
250 for (int i
= 0; i
< num_ubos
; i
++) {
251 struct pipe_constant_buffer
*cb
= &constbuf
->cb
[i
];
253 /* If we have user pointers (constbuf 0, aka GL uniforms), upload them
254 * to a buffer now, and save it in the constbuf so that we don't have
255 * to reupload until they get changed.
257 if (cb
->user_buffer
) {
258 struct pipe_context
*pctx
= &ctx
->base
;
259 u_upload_data(pctx
->stream_uploader
, 0,
263 &cb
->buffer_offset
, &cb
->buffer
);
264 cb
->user_buffer
= NULL
;
268 int size_vec4s
= DIV_ROUND_UP(cb
->buffer_size
, 16);
269 OUT_RELOC(ring
, fd_resource(cb
->buffer
)->bo
,
271 (uint64_t)A6XX_UBO_1_SIZE(size_vec4s
) << 32,
274 OUT_RING(ring
, 0xbad00000 | (i
<< 16));
275 OUT_RING(ring
, A6XX_UBO_1_SIZE(0));
281 user_consts_cmdstream_size(struct ir3_shader_variant
*v
)
283 struct ir3_const_state
*const_state
= ir3_const_state(v
);
284 struct ir3_ubo_analysis_state
*ubo_state
= &const_state
->ubo_state
;
286 if (unlikely(!ubo_state
->cmdstream_size
)) {
287 unsigned packets
, size
;
289 /* pre-calculate size required for userconst stateobj: */
290 ir3_user_consts_size(ubo_state
, &packets
, &size
);
292 /* also account for UBO addresses: */
294 size
+= 2 * const_state
->num_ubos
;
296 unsigned sizedwords
= (4 * packets
) + size
;
297 ubo_state
->cmdstream_size
= sizedwords
* 4;
300 return ubo_state
->cmdstream_size
;
304 emit_user_consts(struct fd6_emit
*emit
)
306 static const enum pipe_shader_type types
[] = {
307 PIPE_SHADER_VERTEX
, PIPE_SHADER_TESS_CTRL
, PIPE_SHADER_TESS_EVAL
,
308 PIPE_SHADER_GEOMETRY
, PIPE_SHADER_FRAGMENT
,
310 struct ir3_shader_variant
*variants
[] = {
311 emit
->vs
, emit
->hs
, emit
->ds
, emit
->gs
, emit
->fs
,
313 struct fd_context
*ctx
= emit
->ctx
;
316 for (unsigned i
= 0; i
< ARRAY_SIZE(types
); i
++) {
319 sz
+= user_consts_cmdstream_size(variants
[i
]);
322 struct fd_ringbuffer
*constobj
= fd_submit_new_ringbuffer(
323 ctx
->batch
->submit
, sz
, FD_RINGBUFFER_STREAMING
);
325 for (unsigned i
= 0; i
< ARRAY_SIZE(types
); i
++) {
328 ir3_emit_user_consts(ctx
->screen
, variants
[i
], constobj
, &ctx
->constbuf
[types
[i
]]);
329 fd6_emit_ubos(ctx
, variants
[i
], constobj
, &ctx
->constbuf
[types
[i
]]);
332 fd6_emit_take_group(emit
, constobj
, FD6_GROUP_CONST
, ENABLE_ALL
);
336 fd6_emit_consts(struct fd6_emit
*emit
)
338 struct fd_context
*ctx
= emit
->ctx
;
339 struct fd6_context
*fd6_ctx
= fd6_context(ctx
);
341 if (emit
->dirty
& (FD_DIRTY_CONST
| FD_DIRTY_PROG
))
342 emit_user_consts(emit
);
344 if (emit
->key
.key
.has_gs
|| emit
->key
.key
.tessellation
)
345 emit_tess_consts(emit
);
347 /* if driver-params are needed, emit each time: */
348 const struct ir3_shader_variant
*vs
= emit
->vs
;
349 if (ir3_needs_vs_driver_params(vs
)) {
350 struct fd_ringbuffer
*dpconstobj
= fd_submit_new_ringbuffer(
351 ctx
->batch
->submit
, IR3_DP_VS_COUNT
* 4, FD_RINGBUFFER_STREAMING
);
352 ir3_emit_vs_driver_params(vs
, dpconstobj
, ctx
, emit
->info
);
353 fd6_emit_take_group(emit
, dpconstobj
, FD6_GROUP_VS_DRIVER_PARAMS
, ENABLE_ALL
);
354 fd6_ctx
->has_dp_state
= true;
355 } else if (fd6_ctx
->has_dp_state
) {
356 fd6_emit_take_group(emit
, NULL
, FD6_GROUP_VS_DRIVER_PARAMS
, ENABLE_ALL
);
357 fd6_ctx
->has_dp_state
= false;
362 fd6_emit_ibo_consts(struct fd6_emit
*emit
, const struct ir3_shader_variant
*v
,
363 enum pipe_shader_type stage
, struct fd_ringbuffer
*ring
)
365 struct fd_context
*ctx
= emit
->ctx
;
367 ir3_emit_ssbo_sizes(ctx
->screen
, v
, ring
, &ctx
->shaderbuf
[stage
]);
368 ir3_emit_image_dims(ctx
->screen
, v
, ring
, &ctx
->shaderimg
[stage
]);
372 fd6_emit_cs_consts(const struct ir3_shader_variant
*v
, struct fd_ringbuffer
*ring
,
373 struct fd_context
*ctx
, const struct pipe_grid_info
*info
)
375 ir3_emit_cs_consts(v
, ring
, ctx
, info
);
376 fd6_emit_ubos(ctx
, v
, ring
, &ctx
->constbuf
[PIPE_SHADER_COMPUTE
]);
380 fd6_emit_immediates(struct fd_screen
*screen
, const struct ir3_shader_variant
*v
,
381 struct fd_ringbuffer
*ring
)
383 ir3_emit_immediates(screen
, v
, ring
);
387 fd6_emit_link_map(struct fd_screen
*screen
,
388 const struct ir3_shader_variant
*producer
,
389 const struct ir3_shader_variant
*v
, struct fd_ringbuffer
*ring
)
391 ir3_emit_link_map(screen
, producer
, v
, ring
);