2 * Copyright (c) 2014-2015 Etnaviv Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Wladimir J. van der Laan <laanwj@gmail.com>
27 #include "etnaviv_emit.h"
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "hw/state_blt.xml.h"
45 #include "util/u_math.h"
47 /* Queue a STALL command (queues 2 words) */
49 CMD_STALL(struct etna_cmd_stream
*stream
, uint32_t from
, uint32_t to
)
51 etna_cmd_stream_emit(stream
, VIV_FE_STALL_HEADER_OP_STALL
);
52 etna_cmd_stream_emit(stream
, VIV_FE_STALL_TOKEN_FROM(from
) | VIV_FE_STALL_TOKEN_TO(to
));
56 etna_stall(struct etna_cmd_stream
*stream
, uint32_t from
, uint32_t to
)
58 bool blt
= (from
== SYNC_RECIPIENT_BLT
) || (to
== SYNC_RECIPIENT_BLT
);
59 etna_cmd_stream_reserve(stream
, blt
? 8 : 4);
62 etna_emit_load_state(stream
, VIVS_BLT_ENABLE
>> 2, 1, 0);
63 etna_cmd_stream_emit(stream
, 1);
66 /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67 etna_emit_load_state(stream
, VIVS_GL_SEMAPHORE_TOKEN
>> 2, 1, 0);
68 etna_cmd_stream_emit(stream
, VIVS_GL_SEMAPHORE_TOKEN_FROM(from
) | VIVS_GL_SEMAPHORE_TOKEN_TO(to
));
70 if (from
== SYNC_RECIPIENT_FE
) {
71 /* if the frontend is to be stalled, queue a STALL frontend command */
72 CMD_STALL(stream
, from
, to
);
74 /* otherwise, load the STALL token state */
75 etna_emit_load_state(stream
, VIVS_GL_STALL_TOKEN
>> 2, 1, 0);
76 etna_cmd_stream_emit(stream
, VIVS_GL_STALL_TOKEN_FROM(from
) | VIVS_GL_STALL_TOKEN_TO(to
));
80 etna_emit_load_state(stream
, VIVS_BLT_ENABLE
>> 2, 1, 0);
81 etna_cmd_stream_emit(stream
, 0);
85 #define EMIT_STATE(state_name, src_value) \
86 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
88 #define EMIT_STATE_FIXP(state_name, src_value) \
89 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
91 #define EMIT_STATE_RELOC(state_name, src_value) \
92 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
94 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
97 required_stream_size(struct etna_context
*ctx
)
99 unsigned size
= ETNA_3D_CONTEXT_SIZE
;
104 /* vertex elements */
105 size
+= ctx
->vertex_elements
->num_elements
+ 1;
107 /* uniforms - worst case (2 words per uniform load) */
108 size
+= ctx
->shader
.vs
->uniforms
.imm_count
* 2;
109 size
+= ctx
->shader
.fs
->uniforms
.imm_count
* 2;
112 size
+= ctx
->shader_state
.vs_inst_mem_size
+ 1;
113 size
+= ctx
->shader_state
.ps_inst_mem_size
+ 1;
115 /* DRAW_INDEXED_PRIMITIVES command */
118 /* reserve for alignment etc. */
124 /* Emit state that only exists on HALTI5+ */
126 emit_halti5_only_state(struct etna_context
*ctx
, int vs_output_count
)
128 struct etna_cmd_stream
*stream
= ctx
->stream
;
129 uint32_t dirty
= ctx
->dirty
;
130 struct etna_coalesce coalesce
;
132 etna_coalesce_start(stream
, &coalesce
);
133 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
134 /* Magic states (load balancing, inter-unit sync, buffers) */
135 /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT
, vs_output_count
| ((vs_output_count
* 0x10) << 8));
136 /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0
, 0x0001000e | ((0x110/vs_output_count
) << 20));
137 for (int x
= 0; x
< 4; ++x
) {
138 /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x
), ctx
->shader_state
.VS_OUTPUT
[x
]);
141 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
142 for (int x
= 0; x
< 4; ++x
) {
143 /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x
), ctx
->shader_state
.VS_INPUT
[x
]);
146 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
147 /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
148 /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT
, vs_output_count
);
149 /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
150 /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS
, ctx
->shader_state
.GL_HALTI5_SH_SPECIALS
);
152 etna_coalesce_end(stream
, &coalesce
);
155 /* Emit state that no longer exists on HALTI5 */
157 emit_pre_halti5_state(struct etna_context
*ctx
)
159 struct etna_cmd_stream
*stream
= ctx
->stream
;
160 uint32_t dirty
= ctx
->dirty
;
161 struct etna_coalesce coalesce
;
163 etna_coalesce_start(stream
, &coalesce
);
164 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
165 /*00800*/ EMIT_STATE(VS_END_PC
, ctx
->shader_state
.VS_END_PC
);
167 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
168 for (int x
= 0; x
< 4; ++x
) {
169 /*00810*/ EMIT_STATE(VS_OUTPUT(x
), ctx
->shader_state
.VS_OUTPUT
[x
]);
172 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
173 for (int x
= 0; x
< 4; ++x
) {
174 /*00820*/ EMIT_STATE(VS_INPUT(x
), ctx
->shader_state
.VS_INPUT
[x
]);
177 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
178 /*00838*/ EMIT_STATE(VS_START_PC
, ctx
->shader_state
.VS_START_PC
);
180 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
181 for (int x
= 0; x
< 10; ++x
) {
182 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x
), ctx
->shader_state
.PA_SHADER_ATTRIBUTES
[x
]);
185 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
186 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04
, ctx
->framebuffer
.RA_MULTISAMPLE_UNK00E04
);
187 for (int x
= 0; x
< 4; ++x
) {
188 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x
), ctx
->framebuffer
.RA_MULTISAMPLE_UNK00E10
[x
]);
190 for (int x
= 0; x
< 16; ++x
) {
191 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x
), ctx
->framebuffer
.RA_CENTROID_TABLE
[x
]);
194 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
195 /*01000*/ EMIT_STATE(PS_END_PC
, ctx
->shader_state
.PS_END_PC
);
197 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
198 /*01018*/ EMIT_STATE(PS_START_PC
, ctx
->shader_state
.PS_START_PC
);
200 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
201 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS
, ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
202 for (int x
= 0; x
< 2; ++x
) {
203 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x
), ctx
->shader_state
.GL_VARYING_COMPONENT_USE
[x
]);
206 etna_coalesce_end(stream
, &coalesce
);
209 /* Weave state before draw operation. This function merges all the compiled
210 * state blocks under the context into one device register state. Parts of
211 * this state that are changed since last call (dirty) will be uploaded as
212 * state changes in the command buffer. */
214 etna_emit_state(struct etna_context
*ctx
)
216 struct etna_cmd_stream
*stream
= ctx
->stream
;
217 unsigned ccw
= ctx
->rasterizer
->front_ccw
;
220 /* Pre-reserve the command buffer space which we are likely to need.
221 * This must cover all the state emitted below, and the following
223 etna_cmd_stream_reserve(stream
, required_stream_size(ctx
));
225 uint32_t dirty
= ctx
->dirty
;
227 /* Pre-processing: see what caches we need to flush before making state changes. */
228 uint32_t to_flush
= 0;
229 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
)))
230 to_flush
|= VIVS_GL_FLUSH_CACHE_COLOR
;
231 if (unlikely(dirty
& (ETNA_DIRTY_TEXTURE_CACHES
)))
232 to_flush
|= VIVS_GL_FLUSH_CACHE_TEXTURE
;
233 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) /* Framebuffer config changed? */
234 to_flush
|= VIVS_GL_FLUSH_CACHE_COLOR
| VIVS_GL_FLUSH_CACHE_DEPTH
;
235 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL
))
236 to_flush
|= VIVS_GL_FLUSH_CACHE_TEXTURE
| VIVS_GL_FLUSH_CACHE_COLOR
| VIVS_GL_FLUSH_CACHE_DEPTH
;
239 etna_set_state(stream
, VIVS_GL_FLUSH_CACHE
, to_flush
);
240 etna_stall(stream
, SYNC_RECIPIENT_RA
, SYNC_RECIPIENT_PE
);
243 /* Flush TS cache before changing TS configuration. */
244 if (unlikely(dirty
& ETNA_DIRTY_TS
)) {
245 etna_set_state(stream
, VIVS_TS_FLUSH_CACHE
, VIVS_TS_FLUSH_CACHE_FLUSH
);
248 /* Update vertex elements. This is different from any of the other states, in that
249 * a) the number of vertex elements written matters: so write only active ones
250 * b) the vertex element states must all be written: do not skip entries that stay the same */
251 if (dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
)) {
252 if (ctx
->specs
.halti
>= 5) {
253 /*17800*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
254 ctx
->vertex_elements
->num_elements
,
255 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_CONFIG0
);
256 /*17A00*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
257 ctx
->vertex_elements
->num_elements
,
258 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_SCALE
);
259 /*17A80*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
260 ctx
->vertex_elements
->num_elements
,
261 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_CONFIG1
);
263 /* Special case: vertex elements must always be sent in full if changed */
264 /*00600*/ etna_set_state_multi(stream
, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
265 ctx
->vertex_elements
->num_elements
,
266 ctx
->vertex_elements
->FE_VERTEX_ELEMENT_CONFIG
);
267 if (ctx
->specs
.halti
>= 2) {
268 /*00780*/ etna_set_state_multi(stream
, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
269 ctx
->vertex_elements
->num_elements
,
270 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_SCALE
);
274 unsigned vs_output_count
= etna_rasterizer_state(ctx
->rasterizer
)->point_size_per_vertex
275 ? ctx
->shader_state
.VS_OUTPUT_COUNT_PSIZE
276 : ctx
->shader_state
.VS_OUTPUT_COUNT
;
278 /* The following code is originally generated by gen_merge_state.py, to
279 * emit state in increasing order of address (this makes it possible to merge
280 * consecutive register updates into one SET_STATE command)
282 * There have been some manual changes, where the weaving operation is not
285 * - num vertex elements
290 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
292 * - PS / framebuffer interaction for MSAA
293 * - move update of GL_MULTI_SAMPLE_CONFIG first
294 * - add unlikely()/likely()
296 struct etna_coalesce coalesce
;
298 etna_coalesce_start(stream
, &coalesce
);
300 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
303 /* multi sample config is set first, and outside of the normal sorting
304 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
305 * possibly PS.TEMP_REGISTER_CONTROL).
307 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_SAMPLE_MASK
))) {
308 uint32_t val
= VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx
->sample_mask
);
309 val
|= ctx
->framebuffer
.GL_MULTI_SAMPLE_CONFIG
;
311 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG
, val
);
313 if (likely(dirty
& (ETNA_DIRTY_INDEX_BUFFER
))) {
314 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR
, &ctx
->index_buffer
.FE_INDEX_STREAM_BASE_ADDR
);
315 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL
, ctx
->index_buffer
.FE_INDEX_STREAM_CONTROL
);
317 if (likely(dirty
& (ETNA_DIRTY_INDEX_BUFFER
))) {
318 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX
, ctx
->index_buffer
.FE_PRIMITIVE_RESTART_INDEX
);
320 if (likely(dirty
& (ETNA_DIRTY_VERTEX_BUFFERS
))) {
321 if (ctx
->specs
.halti
>= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
322 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
323 /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x
), &ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
);
325 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
326 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
327 /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_CONTROL
);
330 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
331 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
332 /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_UNK14680
);
335 } else if(ctx
->specs
.stream_count
>= 1) { /* hw w/ multiple vertex streams */
336 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
337 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x
), &ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
);
339 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
340 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
341 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_CONTROL
);
344 } else { /* hw w/ single vertex stream */
345 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR
, &ctx
->vertex_buffer
.cvb
[0].FE_VERTEX_STREAM_BASE_ADDR
);
346 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL
, ctx
->vertex_buffer
.cvb
[0].FE_VERTEX_STREAM_CONTROL
);
349 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_RASTERIZER
))) {
351 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT
, vs_output_count
);
353 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
354 /*00808*/ EMIT_STATE(VS_INPUT_COUNT
, ctx
->shader_state
.VS_INPUT_COUNT
);
355 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL
, ctx
->shader_state
.VS_TEMP_REGISTER_CONTROL
);
357 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
358 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING
, ctx
->shader_state
.VS_LOAD_BALANCING
);
360 if (unlikely(dirty
& (ETNA_DIRTY_VIEWPORT
))) {
361 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X
, ctx
->viewport
.PA_VIEWPORT_SCALE_X
);
362 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y
, ctx
->viewport
.PA_VIEWPORT_SCALE_Y
);
363 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z
, ctx
->viewport
.PA_VIEWPORT_SCALE_Z
);
364 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X
, ctx
->viewport
.PA_VIEWPORT_OFFSET_X
);
365 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y
, ctx
->viewport
.PA_VIEWPORT_OFFSET_Y
);
366 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z
, ctx
->viewport
.PA_VIEWPORT_OFFSET_Z
);
368 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
369 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
371 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH
, rasterizer
->PA_LINE_WIDTH
);
372 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE
, rasterizer
->PA_POINT_SIZE
);
373 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE
, rasterizer
->PA_SYSTEM_MODE
);
375 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
376 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT
, ctx
->shader_state
.PA_ATTRIBUTE_ELEMENT_COUNT
);
378 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_SHADER
))) {
379 uint32_t val
= etna_rasterizer_state(ctx
->rasterizer
)->PA_CONFIG
;
380 /*00A34*/ EMIT_STATE(PA_CONFIG
, val
& ctx
->shader_state
.PA_CONFIG
);
382 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
383 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
384 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0
, rasterizer
->PA_LINE_WIDTH
);
385 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1
, rasterizer
->PA_LINE_WIDTH
);
387 if (unlikely(dirty
& (ETNA_DIRTY_SCISSOR
| ETNA_DIRTY_FRAMEBUFFER
|
388 ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_VIEWPORT
))) {
389 /* this is a bit of a mess: rasterizer.scissor determines whether to use
390 * only the framebuffer scissor, or specific scissor state, and the
391 * viewport clips too so the logic spans four CSOs */
392 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
394 uint32_t scissor_left
=
395 MAX2(ctx
->framebuffer
.SE_SCISSOR_LEFT
, ctx
->viewport
.SE_SCISSOR_LEFT
);
396 uint32_t scissor_top
=
397 MAX2(ctx
->framebuffer
.SE_SCISSOR_TOP
, ctx
->viewport
.SE_SCISSOR_TOP
);
398 uint32_t scissor_right
=
399 MIN2(ctx
->framebuffer
.SE_SCISSOR_RIGHT
, ctx
->viewport
.SE_SCISSOR_RIGHT
);
400 uint32_t scissor_bottom
=
401 MIN2(ctx
->framebuffer
.SE_SCISSOR_BOTTOM
, ctx
->viewport
.SE_SCISSOR_BOTTOM
);
403 if (rasterizer
->scissor
) {
404 scissor_left
= MAX2(ctx
->scissor
.SE_SCISSOR_LEFT
, scissor_left
);
405 scissor_top
= MAX2(ctx
->scissor
.SE_SCISSOR_TOP
, scissor_top
);
406 scissor_right
= MIN2(ctx
->scissor
.SE_SCISSOR_RIGHT
, scissor_right
);
407 scissor_bottom
= MIN2(ctx
->scissor
.SE_SCISSOR_BOTTOM
, scissor_bottom
);
410 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT
, scissor_left
);
411 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP
, scissor_top
);
412 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT
, scissor_right
);
413 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM
, scissor_bottom
);
415 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
416 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
418 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE
, rasterizer
->SE_DEPTH_SCALE
);
419 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS
, rasterizer
->SE_DEPTH_BIAS
);
420 /*00C18*/ EMIT_STATE(SE_CONFIG
, rasterizer
->SE_CONFIG
);
422 if (unlikely(dirty
& (ETNA_DIRTY_SCISSOR
| ETNA_DIRTY_FRAMEBUFFER
|
423 ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_VIEWPORT
))) {
424 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
426 uint32_t clip_right
=
427 MIN2(ctx
->framebuffer
.SE_CLIP_RIGHT
, ctx
->viewport
.SE_CLIP_RIGHT
);
428 uint32_t clip_bottom
=
429 MIN2(ctx
->framebuffer
.SE_CLIP_BOTTOM
, ctx
->viewport
.SE_CLIP_BOTTOM
);
431 if (rasterizer
->scissor
) {
432 clip_right
= MIN2(ctx
->scissor
.SE_CLIP_RIGHT
, clip_right
);
433 clip_bottom
= MIN2(ctx
->scissor
.SE_CLIP_BOTTOM
, clip_bottom
);
436 /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT
, clip_right
);
437 /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM
, clip_bottom
);
439 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
440 /*00E00*/ EMIT_STATE(RA_CONTROL
, ctx
->shader_state
.RA_CONTROL
);
442 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
443 /*01004*/ EMIT_STATE(PS_OUTPUT_REG
, ctx
->shader_state
.PS_OUTPUT_REG
);
444 /*01008*/ EMIT_STATE(PS_INPUT_COUNT
,
445 ctx
->framebuffer
.msaa_mode
446 ? ctx
->shader_state
.PS_INPUT_COUNT_MSAA
447 : ctx
->shader_state
.PS_INPUT_COUNT
);
448 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL
,
449 ctx
->framebuffer
.msaa_mode
450 ? ctx
->shader_state
.PS_TEMP_REGISTER_CONTROL_MSAA
451 : ctx
->shader_state
.PS_TEMP_REGISTER_CONTROL
);
452 /*01010*/ EMIT_STATE(PS_CONTROL
, ctx
->shader_state
.PS_CONTROL
);
454 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_FRAMEBUFFER
))) {
455 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_DEPTH_CONFIG
;
456 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG
, val
| ctx
->framebuffer
.PE_DEPTH_CONFIG
);
458 if (unlikely(dirty
& (ETNA_DIRTY_VIEWPORT
))) {
459 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR
, ctx
->viewport
.PE_DEPTH_NEAR
);
460 /*01408*/ EMIT_STATE(PE_DEPTH_FAR
, ctx
->viewport
.PE_DEPTH_FAR
);
462 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
463 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE
, ctx
->framebuffer
.PE_DEPTH_NORMALIZE
);
465 if (ctx
->specs
.pixel_pipes
== 1) {
466 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR
, &ctx
->framebuffer
.PE_DEPTH_ADDR
);
469 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE
, ctx
->framebuffer
.PE_DEPTH_STRIDE
);
472 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_RASTERIZER
))) {
473 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_STENCIL_OP
[ccw
];
474 /*01418*/ EMIT_STATE(PE_STENCIL_OP
, val
);
476 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_STENCIL_REF
| ETNA_DIRTY_RASTERIZER
))) {
477 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_STENCIL_CONFIG
[ccw
];
478 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG
, val
| ctx
->stencil_ref
.PE_STENCIL_CONFIG
[ccw
]);
480 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
))) {
481 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_ALPHA_OP
;
482 /*01420*/ EMIT_STATE(PE_ALPHA_OP
, val
);
484 if (unlikely(dirty
& (ETNA_DIRTY_BLEND_COLOR
))) {
485 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR
, ctx
->blend_color
.PE_ALPHA_BLEND_COLOR
);
487 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
488 uint32_t val
= etna_blend_state(ctx
->blend
)->PE_ALPHA_CONFIG
;
489 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG
, val
);
491 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
| ETNA_DIRTY_FRAMEBUFFER
))) {
493 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
494 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
495 val
= ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK
|
496 VIVS_PE_COLOR_FORMAT_OVERWRITE
);
497 val
|= etna_blend_state(ctx
->blend
)->PE_COLOR_FORMAT
;
498 val
&= ctx
->framebuffer
.PE_COLOR_FORMAT
;
499 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT
, val
);
501 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
502 if (ctx
->specs
.pixel_pipes
== 1) {
503 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR
, &ctx
->framebuffer
.PE_COLOR_ADDR
);
504 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE
, ctx
->framebuffer
.PE_COLOR_STRIDE
);
505 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL
, ctx
->framebuffer
.PE_HDEPTH_CONTROL
);
506 } else if (ctx
->specs
.pixel_pipes
== 2) {
507 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE
, ctx
->framebuffer
.PE_COLOR_STRIDE
);
508 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL
, ctx
->framebuffer
.PE_HDEPTH_CONTROL
);
509 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx
->framebuffer
.PE_PIPE_COLOR_ADDR
[0]);
510 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx
->framebuffer
.PE_PIPE_COLOR_ADDR
[1]);
511 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx
->framebuffer
.PE_PIPE_DEPTH_ADDR
[0]);
512 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx
->framebuffer
.PE_PIPE_DEPTH_ADDR
[1]);
517 if (unlikely(dirty
& (ETNA_DIRTY_STENCIL_REF
| ETNA_DIRTY_RASTERIZER
))) {
518 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT
, ctx
->stencil_ref
.PE_STENCIL_CONFIG_EXT
[ccw
]);
520 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
| ETNA_DIRTY_FRAMEBUFFER
))) {
521 struct etna_blend_state
*blend
= etna_blend_state(ctx
->blend
);
522 /*014A4*/ EMIT_STATE(PE_LOGIC_OP
, blend
->PE_LOGIC_OP
| ctx
->framebuffer
.PE_LOGIC_OP
);
524 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
525 struct etna_blend_state
*blend
= etna_blend_state(ctx
->blend
);
526 for (int x
= 0; x
< 2; ++x
) {
527 /*014A8*/ EMIT_STATE(PE_DITHER(x
), blend
->PE_DITHER
[x
]);
530 if (unlikely(dirty
& (ETNA_DIRTY_BLEND_COLOR
))) {
531 /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0
, ctx
->blend_color
.PE_ALPHA_COLOR_EXT0
);
532 /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1
, ctx
->blend_color
.PE_ALPHA_COLOR_EXT1
);
534 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_RASTERIZER
))) {
535 /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2
, etna_zsa_state(ctx
->zsa
)->PE_STENCIL_CONFIG_EXT2
[ccw
]);
537 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
)) && ctx
->specs
.halti
>= 3)
538 /*014BC*/ EMIT_STATE(PE_MEM_CONFIG
, ctx
->framebuffer
.PE_MEM_CONFIG
);
539 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_TS
))) {
540 /*01654*/ EMIT_STATE(TS_MEM_CONFIG
, ctx
->framebuffer
.TS_MEM_CONFIG
);
541 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE
, &ctx
->framebuffer
.TS_COLOR_STATUS_BASE
);
542 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE
, &ctx
->framebuffer
.TS_COLOR_SURFACE_BASE
);
543 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE
, ctx
->framebuffer
.TS_COLOR_CLEAR_VALUE
);
544 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE
, &ctx
->framebuffer
.TS_DEPTH_STATUS_BASE
);
545 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE
, &ctx
->framebuffer
.TS_DEPTH_SURFACE_BASE
);
546 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE
, ctx
->framebuffer
.TS_DEPTH_CLEAR_VALUE
);
548 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
549 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS
, ctx
->shader_state
.GL_VARYING_TOTAL_COMPONENTS
);
551 etna_coalesce_end(stream
, &coalesce
);
552 /* end only EMIT_STATE */
554 /* Emit strongly architecture-specific state */
555 if (ctx
->specs
.halti
>= 5)
556 emit_halti5_only_state(ctx
, vs_output_count
);
558 emit_pre_halti5_state(ctx
);
560 ctx
->emit_texture_state(ctx
);
562 /* Insert a FE/PE stall as changing the shader instructions (and maybe
563 * the uniforms) can corrupt the previous in-progress draw operation.
564 * Observed with amoeba on GC2000 during the right-to-left rendering
565 * of PI, and can cause GPU hangs immediately after.
566 * I summise that this is because the "new" locations at 0xc000 are not
567 * properly protected against updates as other states seem to be. Hence,
568 * we detect the "new" vertex shader instruction offset to apply this. */
569 if (ctx
->dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_CONSTBUF
) && ctx
->specs
.vs_offset
> 0x4000)
570 etna_stall(ctx
->stream
, SYNC_RECIPIENT_FE
, SYNC_RECIPIENT_PE
);
572 /* We need to update the uniform cache only if one of the following bits are
574 * - ETNA_DIRTY_SHADER
575 * - ETNA_DIRTY_CONSTBUF
576 * - uniforms_dirty_bits
578 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
580 * other cases we can load on the changed uniforms.
582 static const uint32_t uniform_dirty_bits
=
583 ETNA_DIRTY_SHADER
| ETNA_DIRTY_CONSTBUF
;
585 /**** Large dynamically-sized state ****/
586 bool do_uniform_flush
= ctx
->specs
.halti
< 5;
587 if (dirty
& (ETNA_DIRTY_SHADER
)) {
588 /* Special case: a new shader was loaded; simply re-load all uniforms and
589 * shader code at once */
590 /* This sequence is special, do not change ordering unless necessary. According to comment
591 snippets in the Vivante kernel driver a process called "steering" goes on while programming
592 shader state. This (as I understand it) means certain unified states are "steered"
593 toward a specific shader unit (VS/PS/...) based on either explicit flags in register
594 00860, or what other state is written before "auto-steering". So this means some
595 state can legitimately be programmed multiple times.
598 if (ctx
->specs
.halti
>= 5) { /* ICACHE (HALTI5) */
599 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
&& ctx
->shader_state
.PS_INST_ADDR
.bo
);
600 /* Set icache (VS) */
601 etna_set_state(stream
, VIVS_VS_NEWRANGE_LOW
, 0);
602 etna_set_state(stream
, VIVS_VS_NEWRANGE_HIGH
, ctx
->shader_state
.vs_inst_mem_size
/ 4);
603 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
);
604 etna_set_state_reloc(stream
, VIVS_VS_INST_ADDR
, &ctx
->shader_state
.VS_INST_ADDR
);
605 etna_set_state(stream
, VIVS_SH_CONFIG
, 0x00000002);
606 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
, VIVS_VS_ICACHE_CONTROL_ENABLE
);
607 etna_set_state(stream
, VIVS_VS_ICACHE_COUNT
, ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1);
609 /* Set icache (PS) */
610 etna_set_state(stream
, VIVS_PS_NEWRANGE_LOW
, 0);
611 etna_set_state(stream
, VIVS_PS_NEWRANGE_HIGH
, ctx
->shader_state
.ps_inst_mem_size
/ 4);
612 assert(ctx
->shader_state
.PS_INST_ADDR
.bo
);
613 etna_set_state_reloc(stream
, VIVS_PS_INST_ADDR
, &ctx
->shader_state
.PS_INST_ADDR
);
614 etna_set_state(stream
, VIVS_SH_CONFIG
, 0x00000002);
615 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
, VIVS_VS_ICACHE_CONTROL_ENABLE
);
616 etna_set_state(stream
, VIVS_PS_ICACHE_COUNT
, ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1);
618 } else if (ctx
->shader_state
.VS_INST_ADDR
.bo
|| ctx
->shader_state
.PS_INST_ADDR
.bo
) {
619 /* ICACHE (pre-HALTI5) */
620 assert(ctx
->specs
.has_icache
&& ctx
->specs
.has_shader_range_registers
);
621 /* Set icache (VS) */
622 etna_set_state(stream
, VIVS_VS_RANGE
, (ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1) << 16);
623 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
624 VIVS_VS_ICACHE_CONTROL_ENABLE
|
625 VIVS_VS_ICACHE_CONTROL_FLUSH_VS
);
626 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
);
627 etna_set_state_reloc(stream
, VIVS_VS_INST_ADDR
, &ctx
->shader_state
.VS_INST_ADDR
);
629 /* Set icache (PS) */
630 etna_set_state(stream
, VIVS_PS_RANGE
, (ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1) << 16);
631 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
632 VIVS_VS_ICACHE_CONTROL_ENABLE
|
633 VIVS_VS_ICACHE_CONTROL_FLUSH_PS
);
634 assert(ctx
->shader_state
.PS_INST_ADDR
.bo
);
635 etna_set_state_reloc(stream
, VIVS_PS_INST_ADDR
, &ctx
->shader_state
.PS_INST_ADDR
);
637 /* Upload shader directly, first flushing and disabling icache if
638 * supported on this hw */
639 if (ctx
->specs
.has_icache
) {
640 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
641 VIVS_VS_ICACHE_CONTROL_FLUSH_PS
|
642 VIVS_VS_ICACHE_CONTROL_FLUSH_VS
);
644 if (ctx
->specs
.has_shader_range_registers
) {
645 etna_set_state(stream
, VIVS_VS_RANGE
, (ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1) << 16);
646 etna_set_state(stream
, VIVS_PS_RANGE
, ((ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1 + 0x100) << 16) |
649 etna_set_state_multi(stream
, ctx
->specs
.vs_offset
,
650 ctx
->shader_state
.vs_inst_mem_size
,
651 ctx
->shader_state
.VS_INST_MEM
);
652 etna_set_state_multi(stream
, ctx
->specs
.ps_offset
,
653 ctx
->shader_state
.ps_inst_mem_size
,
654 ctx
->shader_state
.PS_INST_MEM
);
657 if (ctx
->specs
.has_unified_uniforms
) {
658 etna_set_state(stream
, VIVS_VS_UNIFORM_BASE
, 0);
659 etna_set_state(stream
, VIVS_PS_UNIFORM_BASE
, ctx
->specs
.max_vs_uniforms
);
662 if (do_uniform_flush
)
663 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
);
665 etna_uniforms_write(ctx
, ctx
->shader
.vs
, &ctx
->constant_buffer
[PIPE_SHADER_VERTEX
]);
667 if (do_uniform_flush
)
668 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
| VIVS_VS_UNIFORM_CACHE_PS
);
670 etna_uniforms_write(ctx
, ctx
->shader
.fs
, &ctx
->constant_buffer
[PIPE_SHADER_FRAGMENT
]);
672 if (ctx
->specs
.halti
>= 5) {
673 /* HALTI5 needs to be prompted to pre-fetch shaders */
674 etna_set_state(stream
, VIVS_VS_ICACHE_PREFETCH
, 0x00000000);
675 etna_set_state(stream
, VIVS_PS_ICACHE_PREFETCH
, 0x00000000);
676 etna_stall(stream
, SYNC_RECIPIENT_RA
, SYNC_RECIPIENT_PE
);
679 /* ideally this cache would only be flushed if there are VS uniform changes */
680 if (do_uniform_flush
)
681 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
);
683 if (dirty
& (uniform_dirty_bits
| ctx
->shader
.vs
->uniforms_dirty_bits
))
684 etna_uniforms_write(ctx
, ctx
->shader
.vs
, &ctx
->constant_buffer
[PIPE_SHADER_VERTEX
]);
686 /* ideally this cache would only be flushed if there are PS uniform changes */
687 if (do_uniform_flush
)
688 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
| VIVS_VS_UNIFORM_CACHE_PS
);
690 if (dirty
& (uniform_dirty_bits
| ctx
->shader
.fs
->uniforms_dirty_bits
))
691 etna_uniforms_write(ctx
, ctx
->shader
.fs
, &ctx
->constant_buffer
[PIPE_SHADER_FRAGMENT
]);
693 /**** End of state update ****/
695 #undef EMIT_STATE_FIXP
696 #undef EMIT_STATE_RELOC
698 ctx
->dirty_sampler_views
= 0;