2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "fd6_const.h"
27 #include "ir3_const.h"
29 /* regid: base const register
30 * prsc or dwords: buffer containing constant values
31 * sizedwords: size of const value buffer
34 fd6_emit_const(struct fd_ringbuffer
*ring
, gl_shader_stage type
,
35 uint32_t regid
, uint32_t offset
, uint32_t sizedwords
,
36 const uint32_t *dwords
, struct pipe_resource
*prsc
)
38 uint32_t i
, sz
, align_sz
;
39 enum a6xx_state_src src
;
41 debug_assert((regid
% 4) == 0);
51 align_sz
= align(sz
, 4);
53 OUT_PKT7(ring
, fd6_stage2opcode(type
), 3 + align_sz
);
54 OUT_RING(ring
, CP_LOAD_STATE6_0_DST_OFF(regid
/4) |
55 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
56 CP_LOAD_STATE6_0_STATE_SRC(src
) |
57 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type
)) |
58 CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords
, 4)));
60 struct fd_bo
*bo
= fd_resource(prsc
)->bo
;
61 OUT_RELOC(ring
, bo
, offset
, 0, 0);
63 OUT_RING(ring
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
64 OUT_RING(ring
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
65 dwords
= (uint32_t *)&((uint8_t *)dwords
)[offset
];
68 for (i
= 0; i
< sz
; i
++) {
69 OUT_RING(ring
, dwords
[i
]);
72 /* Zero-pad to multiple of 4 dwords */
73 for (i
= sz
; i
< align_sz
; i
++) {
79 fd6_emit_const_bo(struct fd_ringbuffer
*ring
, gl_shader_stage type
, boolean write
,
80 uint32_t regid
, uint32_t num
, struct pipe_resource
**prscs
, uint32_t *offsets
)
82 uint32_t anum
= align(num
, 2);
85 debug_assert((regid
% 4) == 0);
87 OUT_PKT7(ring
, fd6_stage2opcode(type
), 3 + (2 * anum
));
88 OUT_RING(ring
, CP_LOAD_STATE6_0_DST_OFF(regid
/4) |
89 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
)|
90 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
91 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type
)) |
92 CP_LOAD_STATE6_0_NUM_UNIT(anum
/2));
93 OUT_RING(ring
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
94 OUT_RING(ring
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
96 for (i
= 0; i
< num
; i
++) {
99 OUT_RELOCW(ring
, fd_resource(prscs
[i
])->bo
, offsets
[i
], 0, 0);
101 OUT_RELOC(ring
, fd_resource(prscs
[i
])->bo
, offsets
[i
], 0, 0);
104 OUT_RING(ring
, 0xbad00000 | (i
<< 16));
105 OUT_RING(ring
, 0xbad00000 | (i
<< 16));
109 for (; i
< anum
; i
++) {
110 OUT_RING(ring
, 0xffffffff);
111 OUT_RING(ring
, 0xffffffff);
116 is_stateobj(struct fd_ringbuffer
*ring
)
122 emit_const(struct fd_ringbuffer
*ring
,
123 const struct ir3_shader_variant
*v
, uint32_t dst_offset
,
124 uint32_t offset
, uint32_t size
, const void *user_buffer
,
125 struct pipe_resource
*buffer
)
127 /* TODO inline this */
128 assert(dst_offset
+ size
<= v
->constlen
* 4);
129 fd6_emit_const(ring
, v
->type
, dst_offset
,
130 offset
, size
, user_buffer
, buffer
);
134 emit_const_bo(struct fd_ringbuffer
*ring
,
135 const struct ir3_shader_variant
*v
, bool write
, uint32_t dst_offset
,
136 uint32_t num
, struct pipe_resource
**prscs
, uint32_t *offsets
)
138 /* TODO inline this */
139 assert(dst_offset
+ num
< v
->constlen
* 4);
140 fd6_emit_const_bo(ring
, v
->type
, write
, dst_offset
, num
, prscs
, offsets
);
144 emit_tess_bos(struct fd_ringbuffer
*ring
, struct fd6_emit
*emit
, struct ir3_shader_variant
*s
)
146 struct fd_context
*ctx
= emit
->ctx
;
147 const unsigned regid
= s
->shader
->const_state
.offsets
.primitive_param
* 4 + 4;
148 uint32_t dwords
= 16;
150 OUT_PKT7(ring
, fd6_stage2opcode(s
->type
), 3);
151 OUT_RING(ring
, CP_LOAD_STATE6_0_DST_OFF(regid
/ 4) |
152 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
)|
153 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
154 CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s
->type
)) |
155 CP_LOAD_STATE6_0_NUM_UNIT(dwords
/ 4));
156 OUT_RB(ring
, ctx
->batch
->tess_addrs_constobj
);
160 emit_stage_tess_consts(struct fd_ringbuffer
*ring
, struct ir3_shader_variant
*v
,
161 uint32_t *params
, int num_params
)
163 const unsigned regid
= v
->shader
->const_state
.offsets
.primitive_param
;
164 int size
= MIN2(1 + regid
, v
->constlen
) - regid
;
166 fd6_emit_const(ring
, v
->type
, regid
* 4, 0, num_params
, params
, NULL
);
170 emit_tess_consts(struct fd6_emit
*emit
)
172 struct fd_context
*ctx
= emit
->ctx
;
174 struct fd_ringbuffer
*constobj
= fd_submit_new_ringbuffer(
175 ctx
->batch
->submit
, 0x1000, FD_RINGBUFFER_STREAMING
);
177 /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
178 * size is dwords, since that's what LDG/STG use.
180 unsigned num_vertices
=
182 emit
->info
->vertices_per_patch
:
183 emit
->gs
->shader
->nir
->info
.gs
.vertices_in
;
185 uint32_t vs_params
[4] = {
186 emit
->vs
->shader
->output_size
* num_vertices
* 4, /* vs primitive stride */
187 emit
->vs
->shader
->output_size
* 4, /* vs vertex stride */
192 emit_stage_tess_consts(constobj
, emit
->vs
, vs_params
, ARRAY_SIZE(vs_params
));
195 uint32_t hs_params
[4] = {
196 emit
->vs
->shader
->output_size
* num_vertices
* 4, /* vs primitive stride */
197 emit
->vs
->shader
->output_size
* 4, /* vs vertex stride */
198 emit
->hs
->shader
->output_size
,
199 emit
->info
->vertices_per_patch
202 emit_stage_tess_consts(constobj
, emit
->hs
, hs_params
, ARRAY_SIZE(hs_params
));
203 emit_tess_bos(constobj
, emit
, emit
->hs
);
206 num_vertices
= emit
->gs
->shader
->nir
->info
.gs
.vertices_in
;
208 uint32_t ds_params
[4] = {
209 emit
->ds
->shader
->output_size
* num_vertices
* 4, /* ds primitive stride */
210 emit
->ds
->shader
->output_size
* 4, /* ds vertex stride */
211 emit
->hs
->shader
->output_size
, /* hs vertex stride (dwords) */
212 emit
->hs
->shader
->nir
->info
.tess
.tcs_vertices_out
215 emit_stage_tess_consts(constobj
, emit
->ds
, ds_params
, ARRAY_SIZE(ds_params
));
216 emit_tess_bos(constobj
, emit
, emit
->ds
);
220 struct ir3_shader_variant
*prev
;
226 uint32_t gs_params
[4] = {
227 prev
->shader
->output_size
* num_vertices
* 4, /* ds primitive stride */
228 prev
->shader
->output_size
* 4, /* ds vertex stride */
233 num_vertices
= emit
->gs
->shader
->nir
->info
.gs
.vertices_in
;
234 emit_stage_tess_consts(constobj
, emit
->gs
, gs_params
, ARRAY_SIZE(gs_params
));
237 fd6_emit_take_group(emit
, constobj
, FD6_GROUP_PRIMITIVE_PARAMS
, ENABLE_ALL
);
241 emit_user_consts(struct fd6_emit
*emit
)
243 static const enum pipe_shader_type types
[] = {
244 PIPE_SHADER_VERTEX
, PIPE_SHADER_TESS_CTRL
, PIPE_SHADER_TESS_EVAL
,
245 PIPE_SHADER_GEOMETRY
, PIPE_SHADER_FRAGMENT
,
247 const struct ir3_shader_variant
*variants
[] = {
248 emit
->vs
, emit
->hs
, emit
->ds
, emit
->gs
, emit
->fs
,
250 struct fd_context
*ctx
= emit
->ctx
;
253 for (unsigned i
= 0; i
< ARRAY_SIZE(types
); i
++) {
256 sz
+= variants
[i
]->shader
->ubo_state
.cmdstream_size
;
259 struct fd_ringbuffer
*constobj
= fd_submit_new_ringbuffer(
260 ctx
->batch
->submit
, sz
, FD_RINGBUFFER_STREAMING
);
262 for (unsigned i
= 0; i
< ARRAY_SIZE(types
); i
++) {
265 ir3_emit_user_consts(ctx
->screen
, variants
[i
], constobj
, &ctx
->constbuf
[types
[i
]]);
266 ir3_emit_ubos(ctx
->screen
, variants
[i
], constobj
, &ctx
->constbuf
[types
[i
]]);
269 fd6_emit_take_group(emit
, constobj
, FD6_GROUP_CONST
, ENABLE_ALL
);
273 fd6_emit_consts(struct fd6_emit
*emit
)
275 struct fd_context
*ctx
= emit
->ctx
;
276 struct fd6_context
*fd6_ctx
= fd6_context(ctx
);
278 if (emit
->dirty
& (FD_DIRTY_CONST
| FD_DIRTY_PROG
))
279 emit_user_consts(emit
);
281 if (emit
->key
.key
.has_gs
|| emit
->key
.key
.tessellation
)
282 emit_tess_consts(emit
);
284 /* if driver-params are needed, emit each time: */
285 const struct ir3_shader_variant
*vs
= emit
->vs
;
286 if (ir3_needs_vs_driver_params(vs
)) {
287 struct fd_ringbuffer
*dpconstobj
= fd_submit_new_ringbuffer(
288 ctx
->batch
->submit
, IR3_DP_VS_COUNT
* 4, FD_RINGBUFFER_STREAMING
);
289 ir3_emit_vs_driver_params(vs
, dpconstobj
, ctx
, emit
->info
);
290 fd6_emit_take_group(emit
, dpconstobj
, FD6_GROUP_VS_DRIVER_PARAMS
, ENABLE_ALL
);
291 fd6_ctx
->has_dp_state
= true;
292 } else if (fd6_ctx
->has_dp_state
) {
293 fd6_emit_take_group(emit
, NULL
, FD6_GROUP_VS_DRIVER_PARAMS
, ENABLE_ALL
);
294 fd6_ctx
->has_dp_state
= false;
299 fd6_emit_ibo_consts(struct fd6_emit
*emit
, const struct ir3_shader_variant
*v
,
300 enum pipe_shader_type stage
, struct fd_ringbuffer
*ring
)
302 struct fd_context
*ctx
= emit
->ctx
;
304 ir3_emit_ssbo_sizes(ctx
->screen
, v
, ring
, &ctx
->shaderbuf
[stage
]);
305 ir3_emit_image_dims(ctx
->screen
, v
, ring
, &ctx
->shaderimg
[stage
]);
309 fd6_emit_cs_consts(const struct ir3_shader_variant
*v
, struct fd_ringbuffer
*ring
,
310 struct fd_context
*ctx
, const struct pipe_grid_info
*info
)
312 ir3_emit_cs_consts(v
, ring
, ctx
, info
);
316 fd6_emit_immediates(struct fd_screen
*screen
, const struct ir3_shader_variant
*v
,
317 struct fd_ringbuffer
*ring
)
319 ir3_emit_immediates(screen
, v
, ring
);
323 fd6_user_consts_size(struct ir3_ubo_analysis_state
*state
,
324 unsigned *packets
, unsigned *size
)
326 ir3_user_consts_size(state
, packets
, size
);
330 fd6_emit_link_map(struct fd_screen
*screen
,
331 const struct ir3_shader_variant
*producer
,
332 const struct ir3_shader_variant
*v
, struct fd_ringbuffer
*ring
)
334 ir3_emit_link_map(screen
, producer
, v
, ring
);