2 * Copyright (c) 2014-2015 Etnaviv Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Wladimir J. van der Laan <laanwj@gmail.com>
27 #include "etnaviv_emit.h"
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "hw/state_blt.xml.h"
45 #include "util/u_math.h"
47 /* Queue a STALL command (queues 2 words) */
49 CMD_STALL(struct etna_cmd_stream
*stream
, uint32_t from
, uint32_t to
)
51 etna_cmd_stream_emit(stream
, VIV_FE_STALL_HEADER_OP_STALL
);
52 etna_cmd_stream_emit(stream
, VIV_FE_STALL_TOKEN_FROM(from
) | VIV_FE_STALL_TOKEN_TO(to
));
56 etna_stall(struct etna_cmd_stream
*stream
, uint32_t from
, uint32_t to
)
58 bool blt
= (from
== SYNC_RECIPIENT_BLT
) || (to
== SYNC_RECIPIENT_BLT
);
59 etna_cmd_stream_reserve(stream
, blt
? 8 : 4);
62 etna_emit_load_state(stream
, VIVS_BLT_ENABLE
>> 2, 1, 0);
63 etna_cmd_stream_emit(stream
, 1);
66 /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67 etna_emit_load_state(stream
, VIVS_GL_SEMAPHORE_TOKEN
>> 2, 1, 0);
68 etna_cmd_stream_emit(stream
, VIVS_GL_SEMAPHORE_TOKEN_FROM(from
) | VIVS_GL_SEMAPHORE_TOKEN_TO(to
));
70 if (from
== SYNC_RECIPIENT_FE
) {
71 /* if the frontend is to be stalled, queue a STALL frontend command */
72 CMD_STALL(stream
, from
, to
);
74 /* otherwise, load the STALL token state */
75 etna_emit_load_state(stream
, VIVS_GL_STALL_TOKEN
>> 2, 1, 0);
76 etna_cmd_stream_emit(stream
, VIVS_GL_STALL_TOKEN_FROM(from
) | VIVS_GL_STALL_TOKEN_TO(to
));
80 etna_emit_load_state(stream
, VIVS_BLT_ENABLE
>> 2, 1, 0);
81 etna_cmd_stream_emit(stream
, 0);
85 #define EMIT_STATE(state_name, src_value) \
86 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
88 #define EMIT_STATE_FIXP(state_name, src_value) \
89 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
91 #define EMIT_STATE_RELOC(state_name, src_value) \
92 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
94 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
97 required_stream_size(struct etna_context
*ctx
)
99 unsigned size
= ETNA_3D_CONTEXT_SIZE
;
104 /* vertex elements */
105 size
+= ctx
->vertex_elements
->num_elements
+ 1;
107 /* uniforms - worst case (2 words per uniform load) */
108 size
+= ctx
->shader
.vs
->uniforms
.imm_count
* 2;
109 size
+= ctx
->shader
.fs
->uniforms
.imm_count
* 2;
112 size
+= ctx
->shader_state
.vs_inst_mem_size
+ 1;
113 size
+= ctx
->shader_state
.ps_inst_mem_size
+ 1;
115 /* DRAW_INDEXED_PRIMITIVES command */
118 /* reserve for alignment etc. */
124 /* Emit state that only exists on HALTI5+ */
126 emit_halti5_only_state(struct etna_context
*ctx
, int vs_output_count
)
128 struct etna_cmd_stream
*stream
= ctx
->stream
;
129 uint32_t dirty
= ctx
->dirty
;
130 struct etna_coalesce coalesce
;
132 etna_coalesce_start(stream
, &coalesce
);
133 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
134 /* Magic states (load balancing, inter-unit sync, buffers) */
135 /*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG
, ctx
->shader_state
.FE_HALTI5_ID_CONFIG
);
136 /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT
, vs_output_count
| ((vs_output_count
* 0x10) << 8));
137 /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0
, 0x0001000e | ((0x110/vs_output_count
) << 20));
138 for (int x
= 0; x
< 4; ++x
) {
139 /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x
), ctx
->shader_state
.VS_OUTPUT
[x
]);
142 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
143 for (int x
= 0; x
< 4; ++x
) {
144 /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x
), ctx
->shader_state
.VS_INPUT
[x
]);
147 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
148 /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
149 /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT
, vs_output_count
);
150 /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
151 /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS
, ctx
->shader_state
.GL_HALTI5_SH_SPECIALS
);
153 etna_coalesce_end(stream
, &coalesce
);
156 /* Emit state that no longer exists on HALTI5 */
158 emit_pre_halti5_state(struct etna_context
*ctx
)
160 struct etna_cmd_stream
*stream
= ctx
->stream
;
161 uint32_t dirty
= ctx
->dirty
;
162 struct etna_coalesce coalesce
;
164 etna_coalesce_start(stream
, &coalesce
);
165 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
166 /*00800*/ EMIT_STATE(VS_END_PC
, ctx
->shader_state
.VS_END_PC
);
168 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
169 for (int x
= 0; x
< 4; ++x
) {
170 /*00810*/ EMIT_STATE(VS_OUTPUT(x
), ctx
->shader_state
.VS_OUTPUT
[x
]);
173 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
174 for (int x
= 0; x
< 4; ++x
) {
175 /*00820*/ EMIT_STATE(VS_INPUT(x
), ctx
->shader_state
.VS_INPUT
[x
]);
178 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
179 /*00838*/ EMIT_STATE(VS_START_PC
, ctx
->shader_state
.VS_START_PC
);
181 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
182 for (int x
= 0; x
< 10; ++x
) {
183 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x
), ctx
->shader_state
.PA_SHADER_ATTRIBUTES
[x
]);
186 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
187 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04
, ctx
->framebuffer
.RA_MULTISAMPLE_UNK00E04
);
188 for (int x
= 0; x
< 4; ++x
) {
189 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x
), ctx
->framebuffer
.RA_MULTISAMPLE_UNK00E10
[x
]);
191 for (int x
= 0; x
< 16; ++x
) {
192 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x
), ctx
->framebuffer
.RA_CENTROID_TABLE
[x
]);
195 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
196 /*01000*/ EMIT_STATE(PS_END_PC
, ctx
->shader_state
.PS_END_PC
);
198 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
199 /*01018*/ EMIT_STATE(PS_START_PC
, ctx
->shader_state
.PS_START_PC
);
201 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
202 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS
, ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
203 for (int x
= 0; x
< 2; ++x
) {
204 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x
), ctx
->shader_state
.GL_VARYING_COMPONENT_USE
[x
]);
207 etna_coalesce_end(stream
, &coalesce
);
210 /* Weave state before draw operation. This function merges all the compiled
211 * state blocks under the context into one device register state. Parts of
212 * this state that are changed since last call (dirty) will be uploaded as
213 * state changes in the command buffer. */
215 etna_emit_state(struct etna_context
*ctx
)
217 struct etna_cmd_stream
*stream
= ctx
->stream
;
218 unsigned ccw
= ctx
->rasterizer
->front_ccw
;
221 /* Pre-reserve the command buffer space which we are likely to need.
222 * This must cover all the state emitted below, and the following
224 etna_cmd_stream_reserve(stream
, required_stream_size(ctx
));
226 uint32_t dirty
= ctx
->dirty
;
228 /* Pre-processing: see what caches we need to flush before making state changes. */
229 uint32_t to_flush
= 0;
230 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
)))
231 to_flush
|= VIVS_GL_FLUSH_CACHE_COLOR
;
232 if (unlikely(dirty
& (ETNA_DIRTY_TEXTURE_CACHES
)))
233 to_flush
|= VIVS_GL_FLUSH_CACHE_TEXTURE
;
234 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) /* Framebuffer config changed? */
235 to_flush
|= VIVS_GL_FLUSH_CACHE_COLOR
| VIVS_GL_FLUSH_CACHE_DEPTH
;
236 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL
))
237 to_flush
|= VIVS_GL_FLUSH_CACHE_TEXTURE
| VIVS_GL_FLUSH_CACHE_COLOR
| VIVS_GL_FLUSH_CACHE_DEPTH
;
240 etna_set_state(stream
, VIVS_GL_FLUSH_CACHE
, to_flush
);
241 etna_stall(stream
, SYNC_RECIPIENT_RA
, SYNC_RECIPIENT_PE
);
244 /* Flush TS cache before changing TS configuration. */
245 if (unlikely(dirty
& ETNA_DIRTY_TS
)) {
246 etna_set_state(stream
, VIVS_TS_FLUSH_CACHE
, VIVS_TS_FLUSH_CACHE_FLUSH
);
249 /* Update vertex elements. This is different from any of the other states, in that
250 * a) the number of vertex elements written matters: so write only active ones
251 * b) the vertex element states must all be written: do not skip entries that stay the same */
252 if (dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
)) {
253 if (ctx
->specs
.halti
>= 5) {
254 /*17800*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
255 ctx
->vertex_elements
->num_elements
,
256 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_CONFIG0
);
257 /*17A00*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
258 ctx
->vertex_elements
->num_elements
,
259 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_SCALE
);
260 /*17A80*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
261 ctx
->vertex_elements
->num_elements
,
262 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_CONFIG1
);
264 /* Special case: vertex elements must always be sent in full if changed */
265 /*00600*/ etna_set_state_multi(stream
, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
266 ctx
->vertex_elements
->num_elements
,
267 ctx
->vertex_elements
->FE_VERTEX_ELEMENT_CONFIG
);
268 if (ctx
->specs
.halti
>= 2) {
269 /*00780*/ etna_set_state_multi(stream
, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
270 ctx
->vertex_elements
->num_elements
,
271 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_SCALE
);
275 unsigned vs_output_count
= etna_rasterizer_state(ctx
->rasterizer
)->point_size_per_vertex
276 ? ctx
->shader_state
.VS_OUTPUT_COUNT_PSIZE
277 : ctx
->shader_state
.VS_OUTPUT_COUNT
;
279 /* The following code is originally generated by gen_merge_state.py, to
280 * emit state in increasing order of address (this makes it possible to merge
281 * consecutive register updates into one SET_STATE command)
283 * There have been some manual changes, where the weaving operation is not
286 * - num vertex elements
291 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
293 * - PS / framebuffer interaction for MSAA
294 * - move update of GL_MULTI_SAMPLE_CONFIG first
295 * - add unlikely()/likely()
297 struct etna_coalesce coalesce
;
299 etna_coalesce_start(stream
, &coalesce
);
301 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
304 /* multi sample config is set first, and outside of the normal sorting
305 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
306 * possibly PS.TEMP_REGISTER_CONTROL).
308 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_SAMPLE_MASK
))) {
309 uint32_t val
= VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx
->sample_mask
);
310 val
|= ctx
->framebuffer
.GL_MULTI_SAMPLE_CONFIG
;
312 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG
, val
);
314 if (likely(dirty
& (ETNA_DIRTY_INDEX_BUFFER
))) {
315 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR
, &ctx
->index_buffer
.FE_INDEX_STREAM_BASE_ADDR
);
316 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL
, ctx
->index_buffer
.FE_INDEX_STREAM_CONTROL
);
318 if (likely(dirty
& (ETNA_DIRTY_INDEX_BUFFER
))) {
319 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX
, ctx
->index_buffer
.FE_PRIMITIVE_RESTART_INDEX
);
321 if (likely(dirty
& (ETNA_DIRTY_VERTEX_BUFFERS
))) {
322 if (ctx
->specs
.halti
>= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
323 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
324 /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x
), &ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
);
326 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
327 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
328 /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_CONTROL
);
331 } else if(ctx
->specs
.stream_count
> 1) { /* hw w/ multiple vertex streams */
332 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
333 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x
), &ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
);
335 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
336 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
337 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_CONTROL
);
340 } else { /* hw w/ single vertex stream */
341 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR
, &ctx
->vertex_buffer
.cvb
[0].FE_VERTEX_STREAM_BASE_ADDR
);
342 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL
, ctx
->vertex_buffer
.cvb
[0].FE_VERTEX_STREAM_CONTROL
);
345 /* gallium has instance divisor as part of elements state */
346 if ((dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
)) && ctx
->specs
.halti
>= 2) {
347 for (int x
= 0; x
< ctx
->vertex_elements
->num_buffers
; ++x
) {
348 /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x
), ctx
->vertex_elements
->NFE_VERTEX_STREAMS_VERTEX_DIVISOR
[x
]);
352 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_RASTERIZER
))) {
354 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT
, vs_output_count
);
356 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
357 /*00808*/ EMIT_STATE(VS_INPUT_COUNT
, ctx
->shader_state
.VS_INPUT_COUNT
);
358 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL
, ctx
->shader_state
.VS_TEMP_REGISTER_CONTROL
);
360 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
361 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING
, ctx
->shader_state
.VS_LOAD_BALANCING
);
363 if (unlikely(dirty
& (ETNA_DIRTY_VIEWPORT
))) {
364 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X
, ctx
->viewport
.PA_VIEWPORT_SCALE_X
);
365 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y
, ctx
->viewport
.PA_VIEWPORT_SCALE_Y
);
366 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z
, ctx
->viewport
.PA_VIEWPORT_SCALE_Z
);
367 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X
, ctx
->viewport
.PA_VIEWPORT_OFFSET_X
);
368 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y
, ctx
->viewport
.PA_VIEWPORT_OFFSET_Y
);
369 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z
, ctx
->viewport
.PA_VIEWPORT_OFFSET_Z
);
371 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
372 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
374 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH
, rasterizer
->PA_LINE_WIDTH
);
375 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE
, rasterizer
->PA_POINT_SIZE
);
376 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE
, rasterizer
->PA_SYSTEM_MODE
);
378 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
379 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT
, ctx
->shader_state
.PA_ATTRIBUTE_ELEMENT_COUNT
);
381 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_SHADER
))) {
382 uint32_t val
= etna_rasterizer_state(ctx
->rasterizer
)->PA_CONFIG
;
383 /*00A34*/ EMIT_STATE(PA_CONFIG
, val
& ctx
->shader_state
.PA_CONFIG
);
385 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
386 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
387 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0
, rasterizer
->PA_LINE_WIDTH
);
388 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1
, rasterizer
->PA_LINE_WIDTH
);
390 if (unlikely(dirty
& (ETNA_DIRTY_SCISSOR
| ETNA_DIRTY_FRAMEBUFFER
|
391 ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_VIEWPORT
))) {
392 /* this is a bit of a mess: rasterizer.scissor determines whether to use
393 * only the framebuffer scissor, or specific scissor state, and the
394 * viewport clips too so the logic spans four CSOs */
395 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
397 uint32_t scissor_left
=
398 MAX2(ctx
->framebuffer
.SE_SCISSOR_LEFT
, ctx
->viewport
.SE_SCISSOR_LEFT
);
399 uint32_t scissor_top
=
400 MAX2(ctx
->framebuffer
.SE_SCISSOR_TOP
, ctx
->viewport
.SE_SCISSOR_TOP
);
401 uint32_t scissor_right
=
402 MIN2(ctx
->framebuffer
.SE_SCISSOR_RIGHT
, ctx
->viewport
.SE_SCISSOR_RIGHT
);
403 uint32_t scissor_bottom
=
404 MIN2(ctx
->framebuffer
.SE_SCISSOR_BOTTOM
, ctx
->viewport
.SE_SCISSOR_BOTTOM
);
406 if (rasterizer
->scissor
) {
407 scissor_left
= MAX2(ctx
->scissor
.SE_SCISSOR_LEFT
, scissor_left
);
408 scissor_top
= MAX2(ctx
->scissor
.SE_SCISSOR_TOP
, scissor_top
);
409 scissor_right
= MIN2(ctx
->scissor
.SE_SCISSOR_RIGHT
, scissor_right
);
410 scissor_bottom
= MIN2(ctx
->scissor
.SE_SCISSOR_BOTTOM
, scissor_bottom
);
413 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT
, scissor_left
);
414 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP
, scissor_top
);
415 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT
, scissor_right
);
416 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM
, scissor_bottom
);
418 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
419 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
421 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE
, rasterizer
->SE_DEPTH_SCALE
);
422 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS
, rasterizer
->SE_DEPTH_BIAS
);
423 /*00C18*/ EMIT_STATE(SE_CONFIG
, rasterizer
->SE_CONFIG
);
425 if (unlikely(dirty
& (ETNA_DIRTY_SCISSOR
| ETNA_DIRTY_FRAMEBUFFER
|
426 ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_VIEWPORT
))) {
427 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
429 uint32_t clip_right
=
430 MIN2(ctx
->framebuffer
.SE_CLIP_RIGHT
, ctx
->viewport
.SE_CLIP_RIGHT
);
431 uint32_t clip_bottom
=
432 MIN2(ctx
->framebuffer
.SE_CLIP_BOTTOM
, ctx
->viewport
.SE_CLIP_BOTTOM
);
434 if (rasterizer
->scissor
) {
435 clip_right
= MIN2(ctx
->scissor
.SE_CLIP_RIGHT
, clip_right
);
436 clip_bottom
= MIN2(ctx
->scissor
.SE_CLIP_BOTTOM
, clip_bottom
);
439 /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT
, clip_right
);
440 /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM
, clip_bottom
);
442 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
443 /*00E00*/ EMIT_STATE(RA_CONTROL
, ctx
->shader_state
.RA_CONTROL
);
445 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
446 /*01004*/ EMIT_STATE(PS_OUTPUT_REG
, ctx
->shader_state
.PS_OUTPUT_REG
);
447 /*01008*/ EMIT_STATE(PS_INPUT_COUNT
,
448 ctx
->framebuffer
.msaa_mode
449 ? ctx
->shader_state
.PS_INPUT_COUNT_MSAA
450 : ctx
->shader_state
.PS_INPUT_COUNT
);
451 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL
,
452 ctx
->framebuffer
.msaa_mode
453 ? ctx
->shader_state
.PS_TEMP_REGISTER_CONTROL_MSAA
454 : ctx
->shader_state
.PS_TEMP_REGISTER_CONTROL
);
455 /*01010*/ EMIT_STATE(PS_CONTROL
, ctx
->framebuffer
.PS_CONTROL
);
456 /*01030*/ EMIT_STATE(PS_CONTROL_EXT
, ctx
->framebuffer
.PS_CONTROL_EXT
);
458 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_SHADER
))) {
459 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG
, (etna_zsa_state(ctx
->zsa
)->PE_DEPTH_CONFIG
|
460 ctx
->framebuffer
.PE_DEPTH_CONFIG
) &
461 ctx
->shader_state
.PE_DEPTH_CONFIG
);
463 if (unlikely(dirty
& (ETNA_DIRTY_VIEWPORT
))) {
464 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR
, ctx
->viewport
.PE_DEPTH_NEAR
);
465 /*01408*/ EMIT_STATE(PE_DEPTH_FAR
, ctx
->viewport
.PE_DEPTH_FAR
);
467 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
468 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE
, ctx
->framebuffer
.PE_DEPTH_NORMALIZE
);
470 if (ctx
->specs
.pixel_pipes
== 1) {
471 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR
, &ctx
->framebuffer
.PE_DEPTH_ADDR
);
474 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE
, ctx
->framebuffer
.PE_DEPTH_STRIDE
);
477 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_RASTERIZER
))) {
478 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_STENCIL_OP
[ccw
];
479 /*01418*/ EMIT_STATE(PE_STENCIL_OP
, val
);
481 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_STENCIL_REF
| ETNA_DIRTY_RASTERIZER
))) {
482 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_STENCIL_CONFIG
[ccw
];
483 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG
, val
| ctx
->stencil_ref
.PE_STENCIL_CONFIG
[ccw
]);
485 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
))) {
486 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_ALPHA_OP
;
487 /*01420*/ EMIT_STATE(PE_ALPHA_OP
, val
);
489 if (unlikely(dirty
& (ETNA_DIRTY_BLEND_COLOR
))) {
490 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR
, ctx
->blend_color
.PE_ALPHA_BLEND_COLOR
);
492 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
493 uint32_t val
= etna_blend_state(ctx
->blend
)->PE_ALPHA_CONFIG
;
494 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG
, val
);
496 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
| ETNA_DIRTY_FRAMEBUFFER
))) {
498 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
499 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
500 val
= ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK
|
501 VIVS_PE_COLOR_FORMAT_OVERWRITE
);
502 val
|= etna_blend_state(ctx
->blend
)->PE_COLOR_FORMAT
;
503 val
&= ctx
->framebuffer
.PE_COLOR_FORMAT
;
504 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT
, val
);
506 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
507 if (ctx
->specs
.pixel_pipes
== 1) {
508 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR
, &ctx
->framebuffer
.PE_COLOR_ADDR
);
509 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE
, ctx
->framebuffer
.PE_COLOR_STRIDE
);
510 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL
, ctx
->framebuffer
.PE_HDEPTH_CONTROL
);
511 } else if (ctx
->specs
.pixel_pipes
== 2) {
512 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE
, ctx
->framebuffer
.PE_COLOR_STRIDE
);
513 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL
, ctx
->framebuffer
.PE_HDEPTH_CONTROL
);
514 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx
->framebuffer
.PE_PIPE_COLOR_ADDR
[0]);
515 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx
->framebuffer
.PE_PIPE_COLOR_ADDR
[1]);
516 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx
->framebuffer
.PE_PIPE_DEPTH_ADDR
[0]);
517 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx
->framebuffer
.PE_PIPE_DEPTH_ADDR
[1]);
522 if (unlikely(dirty
& (ETNA_DIRTY_STENCIL_REF
| ETNA_DIRTY_RASTERIZER
))) {
523 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT
, ctx
->stencil_ref
.PE_STENCIL_CONFIG_EXT
[ccw
]);
525 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
| ETNA_DIRTY_FRAMEBUFFER
))) {
526 struct etna_blend_state
*blend
= etna_blend_state(ctx
->blend
);
527 /*014A4*/ EMIT_STATE(PE_LOGIC_OP
, blend
->PE_LOGIC_OP
| ctx
->framebuffer
.PE_LOGIC_OP
);
529 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
530 struct etna_blend_state
*blend
= etna_blend_state(ctx
->blend
);
531 for (int x
= 0; x
< 2; ++x
) {
532 /*014A8*/ EMIT_STATE(PE_DITHER(x
), blend
->PE_DITHER
[x
]);
535 if (unlikely(dirty
& (ETNA_DIRTY_BLEND_COLOR
))) {
536 /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0
, ctx
->blend_color
.PE_ALPHA_COLOR_EXT0
);
537 /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1
, ctx
->blend_color
.PE_ALPHA_COLOR_EXT1
);
539 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_RASTERIZER
))) {
540 /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2
, etna_zsa_state(ctx
->zsa
)->PE_STENCIL_CONFIG_EXT2
[ccw
]);
542 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
)) && ctx
->specs
.halti
>= 3)
543 /*014BC*/ EMIT_STATE(PE_MEM_CONFIG
, ctx
->framebuffer
.PE_MEM_CONFIG
);
544 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_TS
))) {
545 /*01654*/ EMIT_STATE(TS_MEM_CONFIG
, ctx
->framebuffer
.TS_MEM_CONFIG
);
546 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE
, &ctx
->framebuffer
.TS_COLOR_STATUS_BASE
);
547 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE
, &ctx
->framebuffer
.TS_COLOR_SURFACE_BASE
);
548 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE
, ctx
->framebuffer
.TS_COLOR_CLEAR_VALUE
);
549 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE
, &ctx
->framebuffer
.TS_DEPTH_STATUS_BASE
);
550 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE
, &ctx
->framebuffer
.TS_DEPTH_SURFACE_BASE
);
551 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE
, ctx
->framebuffer
.TS_DEPTH_CLEAR_VALUE
);
552 /*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT
, ctx
->framebuffer
.TS_COLOR_CLEAR_VALUE_EXT
);
554 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
555 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS
, ctx
->shader_state
.GL_VARYING_TOTAL_COMPONENTS
);
557 etna_coalesce_end(stream
, &coalesce
);
558 /* end only EMIT_STATE */
560 /* Emit strongly architecture-specific state */
561 if (ctx
->specs
.halti
>= 5)
562 emit_halti5_only_state(ctx
, vs_output_count
);
564 emit_pre_halti5_state(ctx
);
566 ctx
->emit_texture_state(ctx
);
568 /* Insert a FE/PE stall as changing the shader instructions (and maybe
569 * the uniforms) can corrupt the previous in-progress draw operation.
570 * Observed with amoeba on GC2000 during the right-to-left rendering
571 * of PI, and can cause GPU hangs immediately after.
572 * I summise that this is because the "new" locations at 0xc000 are not
573 * properly protected against updates as other states seem to be. Hence,
574 * we detect the "new" vertex shader instruction offset to apply this. */
575 if (ctx
->dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_CONSTBUF
) && ctx
->specs
.vs_offset
> 0x4000)
576 etna_stall(ctx
->stream
, SYNC_RECIPIENT_FE
, SYNC_RECIPIENT_PE
);
578 /* We need to update the uniform cache only if one of the following bits are
580 * - ETNA_DIRTY_SHADER
581 * - ETNA_DIRTY_CONSTBUF
582 * - uniforms_dirty_bits
584 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
586 * other cases we can load on the changed uniforms.
588 static const uint32_t uniform_dirty_bits
=
589 ETNA_DIRTY_SHADER
| ETNA_DIRTY_CONSTBUF
;
591 /**** Large dynamically-sized state ****/
592 bool do_uniform_flush
= ctx
->specs
.halti
< 5;
593 if (dirty
& (ETNA_DIRTY_SHADER
)) {
594 /* Special case: a new shader was loaded; simply re-load all uniforms and
595 * shader code at once */
596 /* This sequence is special, do not change ordering unless necessary. According to comment
597 snippets in the Vivante kernel driver a process called "steering" goes on while programming
598 shader state. This (as I understand it) means certain unified states are "steered"
599 toward a specific shader unit (VS/PS/...) based on either explicit flags in register
600 00860, or what other state is written before "auto-steering". So this means some
601 state can legitimately be programmed multiple times.
604 if (ctx
->specs
.halti
>= 5) { /* ICACHE (HALTI5) */
605 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
&& ctx
->shader_state
.PS_INST_ADDR
.bo
);
606 /* Set icache (VS) */
607 etna_set_state(stream
, VIVS_VS_NEWRANGE_LOW
, 0);
608 etna_set_state(stream
, VIVS_VS_NEWRANGE_HIGH
, ctx
->shader_state
.vs_inst_mem_size
/ 4);
609 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
);
610 etna_set_state_reloc(stream
, VIVS_VS_INST_ADDR
, &ctx
->shader_state
.VS_INST_ADDR
);
611 etna_set_state(stream
, VIVS_SH_CONFIG
, 0x00000002);
612 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
, VIVS_VS_ICACHE_CONTROL_ENABLE
);
613 etna_set_state(stream
, VIVS_VS_ICACHE_COUNT
, ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1);
615 /* Set icache (PS) */
616 etna_set_state(stream
, VIVS_PS_NEWRANGE_LOW
, 0);
617 etna_set_state(stream
, VIVS_PS_NEWRANGE_HIGH
, ctx
->shader_state
.ps_inst_mem_size
/ 4);
618 assert(ctx
->shader_state
.PS_INST_ADDR
.bo
);
619 etna_set_state_reloc(stream
, VIVS_PS_INST_ADDR
, &ctx
->shader_state
.PS_INST_ADDR
);
620 etna_set_state(stream
, VIVS_SH_CONFIG
, 0x00000002);
621 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
, VIVS_VS_ICACHE_CONTROL_ENABLE
);
622 etna_set_state(stream
, VIVS_PS_ICACHE_COUNT
, ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1);
624 } else if (ctx
->shader_state
.VS_INST_ADDR
.bo
|| ctx
->shader_state
.PS_INST_ADDR
.bo
) {
625 /* ICACHE (pre-HALTI5) */
626 assert(ctx
->specs
.has_icache
&& ctx
->specs
.has_shader_range_registers
);
627 /* Set icache (VS) */
628 etna_set_state(stream
, VIVS_VS_RANGE
, (ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1) << 16);
629 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
630 VIVS_VS_ICACHE_CONTROL_ENABLE
|
631 VIVS_VS_ICACHE_CONTROL_FLUSH_VS
);
632 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
);
633 etna_set_state_reloc(stream
, VIVS_VS_INST_ADDR
, &ctx
->shader_state
.VS_INST_ADDR
);
635 /* Set icache (PS) */
636 etna_set_state(stream
, VIVS_PS_RANGE
, (ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1) << 16);
637 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
638 VIVS_VS_ICACHE_CONTROL_ENABLE
|
639 VIVS_VS_ICACHE_CONTROL_FLUSH_PS
);
640 assert(ctx
->shader_state
.PS_INST_ADDR
.bo
);
641 etna_set_state_reloc(stream
, VIVS_PS_INST_ADDR
, &ctx
->shader_state
.PS_INST_ADDR
);
643 /* Upload shader directly, first flushing and disabling icache if
644 * supported on this hw */
645 if (ctx
->specs
.has_icache
) {
646 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
647 VIVS_VS_ICACHE_CONTROL_FLUSH_PS
|
648 VIVS_VS_ICACHE_CONTROL_FLUSH_VS
);
650 if (ctx
->specs
.has_shader_range_registers
) {
651 etna_set_state(stream
, VIVS_VS_RANGE
, (ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1) << 16);
652 etna_set_state(stream
, VIVS_PS_RANGE
, ((ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1 + 0x100) << 16) |
655 etna_set_state_multi(stream
, ctx
->specs
.vs_offset
,
656 ctx
->shader_state
.vs_inst_mem_size
,
657 ctx
->shader_state
.VS_INST_MEM
);
658 etna_set_state_multi(stream
, ctx
->specs
.ps_offset
,
659 ctx
->shader_state
.ps_inst_mem_size
,
660 ctx
->shader_state
.PS_INST_MEM
);
663 if (ctx
->specs
.has_unified_uniforms
) {
664 etna_set_state(stream
, VIVS_VS_UNIFORM_BASE
, 0);
665 etna_set_state(stream
, VIVS_PS_UNIFORM_BASE
, ctx
->specs
.max_vs_uniforms
);
668 if (do_uniform_flush
)
669 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
);
671 etna_uniforms_write(ctx
, ctx
->shader
.vs
, &ctx
->constant_buffer
[PIPE_SHADER_VERTEX
]);
673 if (do_uniform_flush
)
674 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
| VIVS_VS_UNIFORM_CACHE_PS
);
676 etna_uniforms_write(ctx
, ctx
->shader
.fs
, &ctx
->constant_buffer
[PIPE_SHADER_FRAGMENT
]);
678 if (ctx
->specs
.halti
>= 5) {
679 /* HALTI5 needs to be prompted to pre-fetch shaders */
680 etna_set_state(stream
, VIVS_VS_ICACHE_PREFETCH
, 0x00000000);
681 etna_set_state(stream
, VIVS_PS_ICACHE_PREFETCH
, 0x00000000);
682 etna_stall(stream
, SYNC_RECIPIENT_RA
, SYNC_RECIPIENT_PE
);
685 /* ideally this cache would only be flushed if there are VS uniform changes */
686 if (do_uniform_flush
)
687 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
);
689 if (dirty
& (uniform_dirty_bits
| ctx
->shader
.vs
->uniforms_dirty_bits
))
690 etna_uniforms_write(ctx
, ctx
->shader
.vs
, &ctx
->constant_buffer
[PIPE_SHADER_VERTEX
]);
692 /* ideally this cache would only be flushed if there are PS uniform changes */
693 if (do_uniform_flush
)
694 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
| VIVS_VS_UNIFORM_CACHE_PS
);
696 if (dirty
& (uniform_dirty_bits
| ctx
->shader
.fs
->uniforms_dirty_bits
))
697 etna_uniforms_write(ctx
, ctx
->shader
.fs
, &ctx
->constant_buffer
[PIPE_SHADER_FRAGMENT
]);
699 /**** End of state update ****/
701 #undef EMIT_STATE_FIXP
702 #undef EMIT_STATE_RELOC
704 ctx
->dirty_sampler_views
= 0;