2 * Copyright (c) 2014-2015 Etnaviv Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Wladimir J. van der Laan <laanwj@gmail.com>
27 #include "etnaviv_emit.h"
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "hw/state_blt.xml.h"
45 #include "util/u_math.h"
47 /* Queue a STALL command (queues 2 words) */
49 CMD_STALL(struct etna_cmd_stream
*stream
, uint32_t from
, uint32_t to
)
51 etna_cmd_stream_emit(stream
, VIV_FE_STALL_HEADER_OP_STALL
);
52 etna_cmd_stream_emit(stream
, VIV_FE_STALL_TOKEN_FROM(from
) | VIV_FE_STALL_TOKEN_TO(to
));
56 etna_stall(struct etna_cmd_stream
*stream
, uint32_t from
, uint32_t to
)
58 bool blt
= (from
== SYNC_RECIPIENT_BLT
) || (to
== SYNC_RECIPIENT_BLT
);
59 etna_cmd_stream_reserve(stream
, blt
? 8 : 4);
62 etna_emit_load_state(stream
, VIVS_BLT_ENABLE
>> 2, 1, 0);
63 etna_cmd_stream_emit(stream
, 1);
66 /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67 etna_emit_load_state(stream
, VIVS_GL_SEMAPHORE_TOKEN
>> 2, 1, 0);
68 etna_cmd_stream_emit(stream
, VIVS_GL_SEMAPHORE_TOKEN_FROM(from
) | VIVS_GL_SEMAPHORE_TOKEN_TO(to
));
70 if (from
== SYNC_RECIPIENT_FE
) {
71 /* if the frontend is to be stalled, queue a STALL frontend command */
72 CMD_STALL(stream
, from
, to
);
74 /* otherwise, load the STALL token state */
75 etna_emit_load_state(stream
, VIVS_GL_STALL_TOKEN
>> 2, 1, 0);
76 etna_cmd_stream_emit(stream
, VIVS_GL_STALL_TOKEN_FROM(from
) | VIVS_GL_STALL_TOKEN_TO(to
));
80 etna_emit_load_state(stream
, VIVS_BLT_ENABLE
>> 2, 1, 0);
81 etna_cmd_stream_emit(stream
, 0);
85 #define EMIT_STATE(state_name, src_value) \
86 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
88 #define EMIT_STATE_FIXP(state_name, src_value) \
89 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
91 #define EMIT_STATE_RELOC(state_name, src_value) \
92 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
94 /* Create bit field that specifies which samplers are active and thus need to be
96 * 32 bits is enough for 32 samplers. As far as I know this is the upper bound
97 * supported on any Vivante hw
101 active_samplers_bits(struct etna_context
*ctx
)
103 return ctx
->active_sampler_views
& ctx
->active_samplers
;
106 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
109 required_stream_size(struct etna_context
*ctx
)
111 unsigned size
= ETNA_3D_CONTEXT_SIZE
;
116 /* vertex elements */
117 size
+= ctx
->vertex_elements
->num_elements
+ 1;
119 /* uniforms - worst case (2 words per uniform load) */
120 size
+= ctx
->shader
.vs
->uniforms
.const_count
* 2;
121 size
+= ctx
->shader
.fs
->uniforms
.const_count
* 2;
124 size
+= ctx
->shader_state
.vs_inst_mem_size
+ 1;
125 size
+= ctx
->shader_state
.ps_inst_mem_size
+ 1;
127 /* DRAW_INDEXED_PRIMITIVES command */
130 /* reserve for alignment etc. */
136 /* Emit state that only exists on HALTI5+ */
138 emit_halti5_only_state(struct etna_context
*ctx
, int vs_output_count
)
140 struct etna_cmd_stream
*stream
= ctx
->stream
;
141 uint32_t dirty
= ctx
->dirty
;
142 struct etna_coalesce coalesce
;
144 etna_coalesce_start(stream
, &coalesce
);
145 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
146 /* Magic states (load balancing, inter-unit sync, buffers) */
147 /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT
, vs_output_count
| ((vs_output_count
* 0x10) << 8));
148 /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0
, 0x0001000e | ((0x110/vs_output_count
) << 20));
149 for (int x
= 0; x
< 4; ++x
) {
150 /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x
), ctx
->shader_state
.VS_OUTPUT
[x
]);
153 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
154 for (int x
= 0; x
< 4; ++x
) {
155 /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x
), ctx
->shader_state
.VS_INPUT
[x
]);
158 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
159 /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
160 /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT
, vs_output_count
);
161 /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
162 /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS
, ctx
->shader_state
.GL_HALTI5_SH_SPECIALS
);
164 etna_coalesce_end(stream
, &coalesce
);
167 /* Emit state that no longer exists on HALTI5 */
169 emit_pre_halti5_state(struct etna_context
*ctx
)
171 struct etna_cmd_stream
*stream
= ctx
->stream
;
172 uint32_t dirty
= ctx
->dirty
;
173 struct etna_coalesce coalesce
;
175 etna_coalesce_start(stream
, &coalesce
);
176 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
177 /*00800*/ EMIT_STATE(VS_END_PC
, ctx
->shader_state
.VS_END_PC
);
179 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
180 for (int x
= 0; x
< 4; ++x
) {
181 /*00810*/ EMIT_STATE(VS_OUTPUT(x
), ctx
->shader_state
.VS_OUTPUT
[x
]);
184 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
185 for (int x
= 0; x
< 4; ++x
) {
186 /*00820*/ EMIT_STATE(VS_INPUT(x
), ctx
->shader_state
.VS_INPUT
[x
]);
189 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
190 /*00838*/ EMIT_STATE(VS_START_PC
, ctx
->shader_state
.VS_START_PC
);
192 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
193 for (int x
= 0; x
< 10; ++x
) {
194 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x
), ctx
->shader_state
.PA_SHADER_ATTRIBUTES
[x
]);
197 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
198 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04
, ctx
->framebuffer
.RA_MULTISAMPLE_UNK00E04
);
199 for (int x
= 0; x
< 4; ++x
) {
200 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x
), ctx
->framebuffer
.RA_MULTISAMPLE_UNK00E10
[x
]);
202 for (int x
= 0; x
< 16; ++x
) {
203 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x
), ctx
->framebuffer
.RA_CENTROID_TABLE
[x
]);
206 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
207 /*01000*/ EMIT_STATE(PS_END_PC
, ctx
->shader_state
.PS_END_PC
);
209 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
210 /*01018*/ EMIT_STATE(PS_START_PC
, ctx
->shader_state
.PS_START_PC
);
212 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
213 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS
, ctx
->shader_state
.GL_VARYING_NUM_COMPONENTS
);
214 for (int x
= 0; x
< 2; ++x
) {
215 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x
), ctx
->shader_state
.GL_VARYING_COMPONENT_USE
[x
]);
218 etna_coalesce_end(stream
, &coalesce
);
221 /* Weave state before draw operation. This function merges all the compiled
222 * state blocks under the context into one device register state. Parts of
223 * this state that are changed since last call (dirty) will be uploaded as
224 * state changes in the command buffer. */
226 etna_emit_state(struct etna_context
*ctx
)
228 struct etna_cmd_stream
*stream
= ctx
->stream
;
229 uint32_t active_samplers
= active_samplers_bits(ctx
);
231 /* Pre-reserve the command buffer space which we are likely to need.
232 * This must cover all the state emitted below, and the following
234 etna_cmd_stream_reserve(stream
, required_stream_size(ctx
));
236 uint32_t dirty
= ctx
->dirty
;
238 /* Pre-processing: see what caches we need to flush before making state changes. */
239 uint32_t to_flush
= 0;
240 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
241 /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
244 if ((ctx
->gpu3d
.PE_COLOR_FORMAT
& VIVS_PE_COLOR_FORMAT_OVERWRITE
) !=
245 (etna_blend_state(ctx
->blend
)->PE_COLOR_FORMAT
& VIVS_PE_COLOR_FORMAT_OVERWRITE
))
247 to_flush
|= VIVS_GL_FLUSH_CACHE_COLOR
;
249 if (unlikely(dirty
& (ETNA_DIRTY_TEXTURE_CACHES
)))
250 to_flush
|= VIVS_GL_FLUSH_CACHE_TEXTURE
;
251 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) /* Framebuffer config changed? */
252 to_flush
|= VIVS_GL_FLUSH_CACHE_COLOR
| VIVS_GL_FLUSH_CACHE_DEPTH
;
253 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL
))
254 to_flush
|= VIVS_GL_FLUSH_CACHE_TEXTURE
| VIVS_GL_FLUSH_CACHE_COLOR
| VIVS_GL_FLUSH_CACHE_DEPTH
;
257 etna_set_state(stream
, VIVS_GL_FLUSH_CACHE
, to_flush
);
258 etna_stall(stream
, SYNC_RECIPIENT_RA
, SYNC_RECIPIENT_PE
);
261 /* Flush TS cache before changing TS configuration. */
262 if (unlikely(dirty
& ETNA_DIRTY_TS
)) {
263 etna_set_state(stream
, VIVS_TS_FLUSH_CACHE
, VIVS_TS_FLUSH_CACHE_FLUSH
);
266 /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
267 * state to make sure it is always rewritten. */
268 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
269 if ((ctx
->gpu3d
.GL_MULTI_SAMPLE_CONFIG
& VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK
) !=
270 (ctx
->framebuffer
.GL_MULTI_SAMPLE_CONFIG
& VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK
)) {
271 /* XXX what does the GPU set these states to on MSAA samples change?
272 * Does it do the right thing?
273 * (increase/decrease as necessary) or something else? Just set some
274 * invalid value until we know for
276 ctx
->gpu3d
.PS_INPUT_COUNT
= 0xffffffff;
277 ctx
->gpu3d
.PS_TEMP_REGISTER_CONTROL
= 0xffffffff;
281 /* Update vertex elements. This is different from any of the other states, in that
282 * a) the number of vertex elements written matters: so write only active ones
283 * b) the vertex element states must all be written: do not skip entries that stay the same */
284 if (dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
)) {
285 if (ctx
->specs
.halti
>= 5) {
286 /*17800*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
287 ctx
->vertex_elements
->num_elements
,
288 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_CONFIG0
);
289 /*17A00*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
290 ctx
->vertex_elements
->num_elements
,
291 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_SCALE
);
292 /*17A80*/ etna_set_state_multi(stream
, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
293 ctx
->vertex_elements
->num_elements
,
294 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_CONFIG1
);
296 /* Special case: vertex elements must always be sent in full if changed */
297 /*00600*/ etna_set_state_multi(stream
, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
298 ctx
->vertex_elements
->num_elements
,
299 ctx
->vertex_elements
->FE_VERTEX_ELEMENT_CONFIG
);
300 if (ctx
->specs
.halti
>= 2) {
301 /*00780*/ etna_set_state_multi(stream
, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
302 ctx
->vertex_elements
->num_elements
,
303 ctx
->vertex_elements
->NFE_GENERIC_ATTRIB_SCALE
);
307 unsigned vs_output_count
= etna_rasterizer_state(ctx
->rasterizer
)->point_size_per_vertex
308 ? ctx
->shader_state
.VS_OUTPUT_COUNT_PSIZE
309 : ctx
->shader_state
.VS_OUTPUT_COUNT
;
311 /* The following code is originally generated by gen_merge_state.py, to
312 * emit state in increasing order of address (this makes it possible to merge
313 * consecutive register updates into one SET_STATE command)
315 * There have been some manual changes, where the weaving operation is not
318 * - num vertex elements
323 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
325 * - PS / framebuffer interaction for MSAA
326 * - move update of GL_MULTI_SAMPLE_CONFIG first
327 * - add unlikely()/likely()
329 struct etna_coalesce coalesce
;
331 etna_coalesce_start(stream
, &coalesce
);
333 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
336 /* multi sample config is set first, and outside of the normal sorting
337 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
338 * possibly PS.TEMP_REGISTER_CONTROL).
340 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_SAMPLE_MASK
))) {
341 uint32_t val
= VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx
->sample_mask
);
342 val
|= ctx
->framebuffer
.GL_MULTI_SAMPLE_CONFIG
;
344 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG
, val
);
346 if (likely(dirty
& (ETNA_DIRTY_INDEX_BUFFER
))) {
347 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR
, &ctx
->index_buffer
.FE_INDEX_STREAM_BASE_ADDR
);
348 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL
, ctx
->index_buffer
.FE_INDEX_STREAM_CONTROL
);
350 if (likely(dirty
& (ETNA_DIRTY_INDEX_BUFFER
))) {
351 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX
, ctx
->index_buffer
.FE_PRIMITIVE_RESTART_INDEX
);
353 if (likely(dirty
& (ETNA_DIRTY_VERTEX_BUFFERS
))) {
354 if (ctx
->specs
.halti
>= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
355 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
356 /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x
), &ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
);
358 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
359 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
360 /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_CONTROL
);
363 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
364 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
365 /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_UNK14680
);
368 } else if(ctx
->specs
.stream_count
>= 1) { /* hw w/ multiple vertex streams */
369 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
370 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x
), &ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
);
372 for (int x
= 0; x
< ctx
->vertex_buffer
.count
; ++x
) {
373 if (ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_BASE_ADDR
.bo
) {
374 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x
), ctx
->vertex_buffer
.cvb
[x
].FE_VERTEX_STREAM_CONTROL
);
377 } else { /* hw w/ single vertex stream */
378 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR
, &ctx
->vertex_buffer
.cvb
[0].FE_VERTEX_STREAM_BASE_ADDR
);
379 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL
, ctx
->vertex_buffer
.cvb
[0].FE_VERTEX_STREAM_CONTROL
);
382 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_RASTERIZER
))) {
384 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT
, vs_output_count
);
386 if (unlikely(dirty
& (ETNA_DIRTY_VERTEX_ELEMENTS
| ETNA_DIRTY_SHADER
))) {
387 /*00808*/ EMIT_STATE(VS_INPUT_COUNT
, ctx
->shader_state
.VS_INPUT_COUNT
);
388 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL
, ctx
->shader_state
.VS_TEMP_REGISTER_CONTROL
);
390 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
391 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING
, ctx
->shader_state
.VS_LOAD_BALANCING
);
393 if (unlikely(dirty
& (ETNA_DIRTY_VIEWPORT
))) {
394 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X
, ctx
->viewport
.PA_VIEWPORT_SCALE_X
);
395 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y
, ctx
->viewport
.PA_VIEWPORT_SCALE_Y
);
396 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z
, ctx
->viewport
.PA_VIEWPORT_SCALE_Z
);
397 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X
, ctx
->viewport
.PA_VIEWPORT_OFFSET_X
);
398 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y
, ctx
->viewport
.PA_VIEWPORT_OFFSET_Y
);
399 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z
, ctx
->viewport
.PA_VIEWPORT_OFFSET_Z
);
401 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
402 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
404 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH
, rasterizer
->PA_LINE_WIDTH
);
405 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE
, rasterizer
->PA_POINT_SIZE
);
406 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE
, rasterizer
->PA_SYSTEM_MODE
);
408 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
409 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT
, ctx
->shader_state
.PA_ATTRIBUTE_ELEMENT_COUNT
);
411 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_SHADER
))) {
412 uint32_t val
= etna_rasterizer_state(ctx
->rasterizer
)->PA_CONFIG
;
413 /*00A34*/ EMIT_STATE(PA_CONFIG
, val
& ctx
->shader_state
.PA_CONFIG
);
415 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
416 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
417 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0
, rasterizer
->PA_LINE_WIDTH
);
418 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1
, rasterizer
->PA_LINE_WIDTH
);
420 if (unlikely(dirty
& (ETNA_DIRTY_SCISSOR
| ETNA_DIRTY_FRAMEBUFFER
|
421 ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_VIEWPORT
))) {
422 /* this is a bit of a mess: rasterizer.scissor determines whether to use
423 * only the framebuffer scissor, or specific scissor state, and the
424 * viewport clips too so the logic spans four CSOs */
425 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
427 uint32_t scissor_left
=
428 MAX2(ctx
->framebuffer
.SE_SCISSOR_LEFT
, ctx
->viewport
.SE_SCISSOR_LEFT
);
429 uint32_t scissor_top
=
430 MAX2(ctx
->framebuffer
.SE_SCISSOR_TOP
, ctx
->viewport
.SE_SCISSOR_TOP
);
431 uint32_t scissor_right
=
432 MIN2(ctx
->framebuffer
.SE_SCISSOR_RIGHT
, ctx
->viewport
.SE_SCISSOR_RIGHT
);
433 uint32_t scissor_bottom
=
434 MIN2(ctx
->framebuffer
.SE_SCISSOR_BOTTOM
, ctx
->viewport
.SE_SCISSOR_BOTTOM
);
436 if (rasterizer
->scissor
) {
437 scissor_left
= MAX2(ctx
->scissor
.SE_SCISSOR_LEFT
, scissor_left
);
438 scissor_top
= MAX2(ctx
->scissor
.SE_SCISSOR_TOP
, scissor_top
);
439 scissor_right
= MIN2(ctx
->scissor
.SE_SCISSOR_RIGHT
, scissor_right
);
440 scissor_bottom
= MIN2(ctx
->scissor
.SE_SCISSOR_BOTTOM
, scissor_bottom
);
443 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT
, scissor_left
);
444 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP
, scissor_top
);
445 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT
, scissor_right
);
446 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM
, scissor_bottom
);
448 if (unlikely(dirty
& (ETNA_DIRTY_RASTERIZER
))) {
449 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
451 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE
, rasterizer
->SE_DEPTH_SCALE
);
452 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS
, rasterizer
->SE_DEPTH_BIAS
);
453 /*00C18*/ EMIT_STATE(SE_CONFIG
, rasterizer
->SE_CONFIG
);
455 if (unlikely(dirty
& (ETNA_DIRTY_SCISSOR
| ETNA_DIRTY_FRAMEBUFFER
|
456 ETNA_DIRTY_RASTERIZER
| ETNA_DIRTY_VIEWPORT
))) {
457 struct etna_rasterizer_state
*rasterizer
= etna_rasterizer_state(ctx
->rasterizer
);
459 uint32_t clip_right
=
460 MIN2(ctx
->framebuffer
.SE_CLIP_RIGHT
, ctx
->viewport
.SE_CLIP_RIGHT
);
461 uint32_t clip_bottom
=
462 MIN2(ctx
->framebuffer
.SE_CLIP_BOTTOM
, ctx
->viewport
.SE_CLIP_BOTTOM
);
464 if (rasterizer
->scissor
) {
465 clip_right
= MIN2(ctx
->scissor
.SE_CLIP_RIGHT
, clip_right
);
466 clip_bottom
= MIN2(ctx
->scissor
.SE_CLIP_BOTTOM
, clip_bottom
);
469 /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT
, clip_right
);
470 /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM
, clip_bottom
);
472 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
473 /*00E00*/ EMIT_STATE(RA_CONTROL
, ctx
->shader_state
.RA_CONTROL
);
475 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_FRAMEBUFFER
))) {
476 /*01004*/ EMIT_STATE(PS_OUTPUT_REG
, ctx
->shader_state
.PS_OUTPUT_REG
);
477 /*01008*/ EMIT_STATE(PS_INPUT_COUNT
,
478 ctx
->framebuffer
.msaa_mode
479 ? ctx
->shader_state
.PS_INPUT_COUNT_MSAA
480 : ctx
->shader_state
.PS_INPUT_COUNT
);
481 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL
,
482 ctx
->framebuffer
.msaa_mode
483 ? ctx
->shader_state
.PS_TEMP_REGISTER_CONTROL_MSAA
484 : ctx
->shader_state
.PS_TEMP_REGISTER_CONTROL
);
485 /*01010*/ EMIT_STATE(PS_CONTROL
, ctx
->shader_state
.PS_CONTROL
);
487 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_FRAMEBUFFER
))) {
488 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_DEPTH_CONFIG
;
489 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG
, val
| ctx
->framebuffer
.PE_DEPTH_CONFIG
);
491 if (unlikely(dirty
& (ETNA_DIRTY_VIEWPORT
))) {
492 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR
, ctx
->viewport
.PE_DEPTH_NEAR
);
493 /*01408*/ EMIT_STATE(PE_DEPTH_FAR
, ctx
->viewport
.PE_DEPTH_FAR
);
495 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
496 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE
, ctx
->framebuffer
.PE_DEPTH_NORMALIZE
);
498 if (ctx
->specs
.pixel_pipes
== 1) {
499 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR
, &ctx
->framebuffer
.PE_DEPTH_ADDR
);
502 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE
, ctx
->framebuffer
.PE_DEPTH_STRIDE
);
504 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
))) {
505 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_STENCIL_OP
;
506 /*01418*/ EMIT_STATE(PE_STENCIL_OP
, val
);
508 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
| ETNA_DIRTY_STENCIL_REF
))) {
509 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_STENCIL_CONFIG
;
510 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG
, val
| ctx
->stencil_ref
.PE_STENCIL_CONFIG
);
512 if (unlikely(dirty
& (ETNA_DIRTY_ZSA
))) {
513 uint32_t val
= etna_zsa_state(ctx
->zsa
)->PE_ALPHA_OP
;
514 /*01420*/ EMIT_STATE(PE_ALPHA_OP
, val
);
516 if (unlikely(dirty
& (ETNA_DIRTY_BLEND_COLOR
))) {
517 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR
, ctx
->blend_color
.PE_ALPHA_BLEND_COLOR
);
519 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
520 uint32_t val
= etna_blend_state(ctx
->blend
)->PE_ALPHA_CONFIG
;
521 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG
, val
);
523 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
| ETNA_DIRTY_FRAMEBUFFER
))) {
525 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
526 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
527 val
= ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK
|
528 VIVS_PE_COLOR_FORMAT_OVERWRITE
);
529 val
|= etna_blend_state(ctx
->blend
)->PE_COLOR_FORMAT
;
530 val
&= ctx
->framebuffer
.PE_COLOR_FORMAT
;
531 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT
, val
);
533 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
))) {
534 if (ctx
->specs
.pixel_pipes
== 1) {
535 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR
, &ctx
->framebuffer
.PE_COLOR_ADDR
);
536 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE
, ctx
->framebuffer
.PE_COLOR_STRIDE
);
537 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL
, ctx
->framebuffer
.PE_HDEPTH_CONTROL
);
538 } else if (ctx
->specs
.pixel_pipes
== 2) {
539 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE
, ctx
->framebuffer
.PE_COLOR_STRIDE
);
540 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL
, ctx
->framebuffer
.PE_HDEPTH_CONTROL
);
541 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx
->framebuffer
.PE_PIPE_COLOR_ADDR
[0]);
542 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx
->framebuffer
.PE_PIPE_COLOR_ADDR
[1]);
543 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx
->framebuffer
.PE_PIPE_DEPTH_ADDR
[0]);
544 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx
->framebuffer
.PE_PIPE_DEPTH_ADDR
[1]);
549 if (unlikely(dirty
& (ETNA_DIRTY_STENCIL_REF
))) {
550 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT
, ctx
->stencil_ref
.PE_STENCIL_CONFIG_EXT
);
552 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
| ETNA_DIRTY_FRAMEBUFFER
))) {
553 struct etna_blend_state
*blend
= etna_blend_state(ctx
->blend
);
554 /*014A4*/ EMIT_STATE(PE_LOGIC_OP
, blend
->PE_LOGIC_OP
| ctx
->framebuffer
.PE_LOGIC_OP
);
556 if (unlikely(dirty
& (ETNA_DIRTY_BLEND
))) {
557 struct etna_blend_state
*blend
= etna_blend_state(ctx
->blend
);
558 for (int x
= 0; x
< 2; ++x
) {
559 /*014A8*/ EMIT_STATE(PE_DITHER(x
), blend
->PE_DITHER
[x
]);
562 if (unlikely(dirty
& (ETNA_DIRTY_FRAMEBUFFER
| ETNA_DIRTY_TS
))) {
563 /*01654*/ EMIT_STATE(TS_MEM_CONFIG
, ctx
->framebuffer
.TS_MEM_CONFIG
);
564 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE
, &ctx
->framebuffer
.TS_COLOR_STATUS_BASE
);
565 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE
, &ctx
->framebuffer
.TS_COLOR_SURFACE_BASE
);
566 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE
, ctx
->framebuffer
.TS_COLOR_CLEAR_VALUE
);
567 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE
, &ctx
->framebuffer
.TS_DEPTH_STATUS_BASE
);
568 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE
, &ctx
->framebuffer
.TS_DEPTH_SURFACE_BASE
);
569 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE
, ctx
->framebuffer
.TS_DEPTH_CLEAR_VALUE
);
571 if (unlikely(dirty
& ETNA_DIRTY_SAMPLER_VIEWS
)) {
572 for (int x
= 0; x
< VIVS_TS_SAMPLER__LEN
; ++x
) {
573 if ((1 << x
) & active_samplers
) {
574 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
575 /*01720*/ EMIT_STATE(TS_SAMPLER_CONFIG(x
), sv
->TS_SAMPLER_CONFIG
);
578 for (int x
= 0; x
< VIVS_TS_SAMPLER__LEN
; ++x
) {
579 if ((1 << x
) & active_samplers
) {
580 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
581 /*01740*/ EMIT_STATE_RELOC(TS_SAMPLER_STATUS_BASE(x
), &sv
->TS_SAMPLER_STATUS_BASE
);
584 for (int x
= 0; x
< VIVS_TS_SAMPLER__LEN
; ++x
) {
585 if ((1 << x
) & active_samplers
) {
586 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
587 /*01760*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE(x
), sv
->TS_SAMPLER_CLEAR_VALUE
);
590 for (int x
= 0; x
< VIVS_TS_SAMPLER__LEN
; ++x
) {
591 if ((1 << x
) & active_samplers
) {
592 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
593 /*01780*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE2(x
), sv
->TS_SAMPLER_CLEAR_VALUE2
);
597 if (unlikely(dirty
& (ETNA_DIRTY_SAMPLER_VIEWS
| ETNA_DIRTY_SAMPLERS
))) {
598 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
599 uint32_t val
= 0; /* 0 == sampler inactive */
601 /* set active samplers to their configuration value (determined by both
602 * the sampler state and sampler view) */
603 if ((1 << x
) & active_samplers
) {
604 struct etna_sampler_state
*ss
= etna_sampler_state(ctx
->sampler
[x
]);
605 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
607 val
= (ss
->TE_SAMPLER_CONFIG0
& sv
->TE_SAMPLER_CONFIG0_MASK
) |
608 sv
->TE_SAMPLER_CONFIG0
;
611 /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x
), val
);
614 if (unlikely(dirty
& (ETNA_DIRTY_SAMPLER_VIEWS
))) {
615 struct etna_sampler_view
*sv
;
617 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
618 if ((1 << x
) & active_samplers
) {
619 sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
620 /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x
), sv
->TE_SAMPLER_SIZE
);
623 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
624 if ((1 << x
) & active_samplers
) {
625 sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
626 /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x
), sv
->TE_SAMPLER_LOG_SIZE
);
630 if (unlikely(dirty
& (ETNA_DIRTY_SAMPLER_VIEWS
| ETNA_DIRTY_SAMPLERS
))) {
631 struct etna_sampler_state
*ss
;
632 struct etna_sampler_view
*sv
;
634 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
635 if ((1 << x
) & active_samplers
) {
636 ss
= etna_sampler_state(ctx
->sampler
[x
]);
637 sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
639 /* min and max lod is determined both by the sampler and the view */
640 /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x
),
641 ss
->TE_SAMPLER_LOD_CONFIG
|
642 VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(ss
->max_lod
, sv
->max_lod
)) |
643 VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss
->min_lod
, sv
->min_lod
)));
646 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
647 if ((1 << x
) & active_samplers
) {
648 ss
= etna_sampler_state(ctx
->sampler
[x
]);
649 sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
651 /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x
), ss
->TE_SAMPLER_CONFIG1
|
652 sv
->TE_SAMPLER_CONFIG1
);
656 if (unlikely(dirty
& (ETNA_DIRTY_SAMPLER_VIEWS
))) {
657 for (int y
= 0; y
< VIVS_TE_SAMPLER_LOD_ADDR__LEN
; ++y
) {
658 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
659 if ((1 << x
) & active_samplers
) {
660 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
661 /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x
, y
),&sv
->TE_SAMPLER_LOD_ADDR
[y
]);
667 if (unlikely(dirty
& (ETNA_DIRTY_SHADER
))) {
668 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS
, ctx
->shader_state
.GL_VARYING_TOTAL_COMPONENTS
);
670 if (unlikely(ctx
->specs
.tex_astc
&& (dirty
& (ETNA_DIRTY_SAMPLER_VIEWS
)))) {
671 for (int x
= 0; x
< VIVS_TE_SAMPLER__LEN
; ++x
) {
672 if ((1 << x
) & active_samplers
) {
673 struct etna_sampler_view
*sv
= etna_sampler_view(ctx
->sampler_view
[x
]);
674 /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x
), sv
->TE_SAMPLER_ASTC0
);
678 etna_coalesce_end(stream
, &coalesce
);
679 /* end only EMIT_STATE */
681 /* Emit strongly architecture-specific state */
682 if (ctx
->specs
.halti
>= 5)
683 emit_halti5_only_state(ctx
, vs_output_count
);
685 emit_pre_halti5_state(ctx
);
687 /* Insert a FE/PE stall as changing the shader instructions (and maybe
688 * the uniforms) can corrupt the previous in-progress draw operation.
689 * Observed with amoeba on GC2000 during the right-to-left rendering
690 * of PI, and can cause GPU hangs immediately after.
691 * I summise that this is because the "new" locations at 0xc000 are not
692 * properly protected against updates as other states seem to be. Hence,
693 * we detect the "new" vertex shader instruction offset to apply this. */
694 if (ctx
->dirty
& (ETNA_DIRTY_SHADER
| ETNA_DIRTY_CONSTBUF
) && ctx
->specs
.vs_offset
> 0x4000)
695 etna_stall(ctx
->stream
, SYNC_RECIPIENT_FE
, SYNC_RECIPIENT_PE
);
697 /* We need to update the uniform cache only if one of the following bits are
699 * - ETNA_DIRTY_SHADER
700 * - ETNA_DIRTY_CONSTBUF
701 * - uniforms_dirty_bits
703 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
705 * other cases we can load on the changed uniforms.
707 static const uint32_t uniform_dirty_bits
=
708 ETNA_DIRTY_SHADER
| ETNA_DIRTY_CONSTBUF
;
710 if (dirty
& (uniform_dirty_bits
| ctx
->shader
.fs
->uniforms_dirty_bits
))
712 ctx
, ctx
->shader
.vs
, &ctx
->constant_buffer
[PIPE_SHADER_VERTEX
],
713 ctx
->shader_state
.VS_UNIFORMS
, &ctx
->shader_state
.vs_uniforms_size
);
715 if (dirty
& (uniform_dirty_bits
| ctx
->shader
.vs
->uniforms_dirty_bits
))
717 ctx
, ctx
->shader
.fs
, &ctx
->constant_buffer
[PIPE_SHADER_FRAGMENT
],
718 ctx
->shader_state
.PS_UNIFORMS
, &ctx
->shader_state
.ps_uniforms_size
);
720 /**** Large dynamically-sized state ****/
721 bool do_uniform_flush
= ctx
->specs
.halti
< 5;
722 if (dirty
& (ETNA_DIRTY_SHADER
)) {
723 /* Special case: a new shader was loaded; simply re-load all uniforms and
724 * shader code at once */
725 /* This sequence is special, do not change ordering unless necessary. According to comment
726 snippets in the Vivante kernel driver a process called "steering" goes on while programming
727 shader state. This (as I understand it) means certain unified states are "steered"
728 toward a specific shader unit (VS/PS/...) based on either explicit flags in register
729 00860, or what other state is written before "auto-steering". So this means some
730 state can legitimately be programmed multiple times.
733 if (ctx
->specs
.halti
>= 5) { /* ICACHE (HALTI5) */
734 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
&& ctx
->shader_state
.PS_INST_ADDR
.bo
);
735 /* Set icache (VS) */
736 etna_set_state(stream
, VIVS_VS_NEWRANGE_LOW
, 0);
737 etna_set_state(stream
, VIVS_VS_NEWRANGE_HIGH
, ctx
->shader_state
.vs_inst_mem_size
/ 4);
738 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
);
739 etna_set_state_reloc(stream
, VIVS_VS_INST_ADDR
, &ctx
->shader_state
.VS_INST_ADDR
);
740 etna_set_state(stream
, VIVS_SH_CONFIG
, 0x00000002);
741 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
, VIVS_VS_ICACHE_CONTROL_ENABLE
);
742 etna_set_state(stream
, VIVS_VS_ICACHE_COUNT
, ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1);
744 /* Set icache (PS) */
745 etna_set_state(stream
, VIVS_PS_NEWRANGE_LOW
, 0);
746 etna_set_state(stream
, VIVS_PS_NEWRANGE_HIGH
, ctx
->shader_state
.ps_inst_mem_size
/ 4);
747 assert(ctx
->shader_state
.PS_INST_ADDR
.bo
);
748 etna_set_state_reloc(stream
, VIVS_PS_INST_ADDR
, &ctx
->shader_state
.PS_INST_ADDR
);
749 etna_set_state(stream
, VIVS_SH_CONFIG
, 0x00000002);
750 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
, VIVS_VS_ICACHE_CONTROL_ENABLE
);
751 etna_set_state(stream
, VIVS_PS_ICACHE_COUNT
, ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1);
753 } else if (ctx
->shader_state
.VS_INST_ADDR
.bo
|| ctx
->shader_state
.PS_INST_ADDR
.bo
) {
754 /* ICACHE (pre-HALTI5) */
755 assert(ctx
->specs
.has_icache
&& ctx
->specs
.has_shader_range_registers
);
756 /* Set icache (VS) */
757 etna_set_state(stream
, VIVS_VS_RANGE
, (ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1) << 16);
758 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
759 VIVS_VS_ICACHE_CONTROL_ENABLE
|
760 VIVS_VS_ICACHE_CONTROL_FLUSH_VS
);
761 assert(ctx
->shader_state
.VS_INST_ADDR
.bo
);
762 etna_set_state_reloc(stream
, VIVS_VS_INST_ADDR
, &ctx
->shader_state
.VS_INST_ADDR
);
764 /* Set icache (PS) */
765 etna_set_state(stream
, VIVS_PS_RANGE
, (ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1) << 16);
766 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
767 VIVS_VS_ICACHE_CONTROL_ENABLE
|
768 VIVS_VS_ICACHE_CONTROL_FLUSH_PS
);
769 assert(ctx
->shader_state
.PS_INST_ADDR
.bo
);
770 etna_set_state_reloc(stream
, VIVS_PS_INST_ADDR
, &ctx
->shader_state
.PS_INST_ADDR
);
772 /* Upload shader directly, first flushing and disabling icache if
773 * supported on this hw */
774 if (ctx
->specs
.has_icache
) {
775 etna_set_state(stream
, VIVS_VS_ICACHE_CONTROL
,
776 VIVS_VS_ICACHE_CONTROL_FLUSH_PS
|
777 VIVS_VS_ICACHE_CONTROL_FLUSH_VS
);
779 if (ctx
->specs
.has_shader_range_registers
) {
780 etna_set_state(stream
, VIVS_VS_RANGE
, (ctx
->shader_state
.vs_inst_mem_size
/ 4 - 1) << 16);
781 etna_set_state(stream
, VIVS_PS_RANGE
, ((ctx
->shader_state
.ps_inst_mem_size
/ 4 - 1 + 0x100) << 16) |
784 etna_set_state_multi(stream
, ctx
->specs
.vs_offset
,
785 ctx
->shader_state
.vs_inst_mem_size
,
786 ctx
->shader_state
.VS_INST_MEM
);
787 etna_set_state_multi(stream
, ctx
->specs
.ps_offset
,
788 ctx
->shader_state
.ps_inst_mem_size
,
789 ctx
->shader_state
.PS_INST_MEM
);
792 if (ctx
->specs
.has_unified_uniforms
) {
793 etna_set_state(stream
, VIVS_VS_UNIFORM_BASE
, 0);
794 etna_set_state(stream
, VIVS_PS_UNIFORM_BASE
, ctx
->specs
.max_vs_uniforms
);
797 if (do_uniform_flush
)
798 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
);
799 etna_set_state_multi(stream
, ctx
->specs
.vs_uniforms_offset
,
800 ctx
->shader_state
.vs_uniforms_size
,
801 ctx
->shader_state
.VS_UNIFORMS
);
802 if (do_uniform_flush
)
803 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
| VIVS_VS_UNIFORM_CACHE_PS
);
804 etna_set_state_multi(stream
, ctx
->specs
.ps_uniforms_offset
,
805 ctx
->shader_state
.ps_uniforms_size
,
806 ctx
->shader_state
.PS_UNIFORMS
);
808 /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
809 * possible as long as the
810 * same shader remains bound */
811 ctx
->gpu3d
.vs_uniforms_size
= ctx
->shader_state
.vs_uniforms_size
;
812 ctx
->gpu3d
.ps_uniforms_size
= ctx
->shader_state
.ps_uniforms_size
;
813 memcpy(ctx
->gpu3d
.VS_UNIFORMS
, ctx
->shader_state
.VS_UNIFORMS
,
814 ctx
->shader_state
.vs_uniforms_size
* 4);
815 memcpy(ctx
->gpu3d
.PS_UNIFORMS
, ctx
->shader_state
.PS_UNIFORMS
,
816 ctx
->shader_state
.ps_uniforms_size
* 4);
818 if (ctx
->specs
.halti
>= 5) {
819 /* HALTI5 needs to be prompted to pre-fetch shaders */
820 etna_set_state(stream
, VIVS_VS_ICACHE_PREFETCH
, 0x00000000);
821 etna_set_state(stream
, VIVS_PS_ICACHE_PREFETCH
, 0x00000000);
822 etna_stall(stream
, SYNC_RECIPIENT_RA
, SYNC_RECIPIENT_PE
);
825 /* ideally this cache would only be flushed if there are VS uniform changes */
826 if (do_uniform_flush
)
827 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
);
828 etna_coalesce_start(stream
, &coalesce
);
829 for (int x
= 0; x
< ctx
->shader
.vs
->uniforms
.const_count
; ++x
) {
830 if (ctx
->gpu3d
.VS_UNIFORMS
[x
] != ctx
->shader_state
.VS_UNIFORMS
[x
]) {
831 etna_coalsence_emit(stream
, &coalesce
, ctx
->specs
.vs_uniforms_offset
+ x
*4, ctx
->shader_state
.VS_UNIFORMS
[x
]);
832 ctx
->gpu3d
.VS_UNIFORMS
[x
] = ctx
->shader_state
.VS_UNIFORMS
[x
];
835 etna_coalesce_end(stream
, &coalesce
);
837 /* ideally this cache would only be flushed if there are PS uniform changes */
838 if (do_uniform_flush
)
839 etna_set_state(stream
, VIVS_VS_UNIFORM_CACHE
, VIVS_VS_UNIFORM_CACHE_FLUSH
| VIVS_VS_UNIFORM_CACHE_PS
);
840 etna_coalesce_start(stream
, &coalesce
);
841 for (int x
= 0; x
< ctx
->shader
.fs
->uniforms
.const_count
; ++x
) {
842 if (ctx
->gpu3d
.PS_UNIFORMS
[x
] != ctx
->shader_state
.PS_UNIFORMS
[x
]) {
843 etna_coalsence_emit(stream
, &coalesce
, ctx
->specs
.ps_uniforms_offset
+ x
*4, ctx
->shader_state
.PS_UNIFORMS
[x
]);
844 ctx
->gpu3d
.PS_UNIFORMS
[x
] = ctx
->shader_state
.PS_UNIFORMS
[x
];
847 etna_coalesce_end(stream
, &coalesce
);
849 /**** End of state update ****/
851 #undef EMIT_STATE_FIXP
852 #undef EMIT_STATE_RELOC