etnaviv: GC7000: Move etna_coalesce to emit header file
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_emit.c
1 /*
2 * Copyright (c) 2014-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 #include "etnaviv_emit.h"
28
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "hw/state_blt.xml.h"
45 #include "util/u_math.h"
46
47 /* Queue a STALL command (queues 2 words) */
48 static inline void
49 CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
50 {
51 etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
52 etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
53 }
54
55 void
56 etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
57 {
58 bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
59 etna_cmd_stream_reserve(stream, blt ? 8 : 4);
60
61 if (blt) {
62 etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
63 etna_cmd_stream_emit(stream, 1);
64 }
65
66 /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67 etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
68 etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
69
70 if (from == SYNC_RECIPIENT_FE) {
71 /* if the frontend is to be stalled, queue a STALL frontend command */
72 CMD_STALL(stream, from, to);
73 } else {
74 /* otherwise, load the STALL token state */
75 etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
76 etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
77 }
78
79 if (blt) {
80 etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
81 etna_cmd_stream_emit(stream, 0);
82 }
83 }
84
85 #define EMIT_STATE(state_name, src_value) \
86 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
87
88 #define EMIT_STATE_FIXP(state_name, src_value) \
89 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
90
91 #define EMIT_STATE_RELOC(state_name, src_value) \
92 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
93
94 /* submit RS state, without any processing and no dependence on context
95 * except TS if this is a source-to-destination blit. */
96 void
97 etna_submit_rs_state(struct etna_context *ctx,
98 const struct compiled_rs_state *cs)
99 {
100 struct etna_screen *screen = etna_screen(ctx->base.screen);
101 struct etna_cmd_stream *stream = ctx->stream;
102 struct etna_coalesce coalesce;
103
104 if (cs->RS_KICKER_INPLACE && !cs->source_ts_valid)
105 /* Inplace resolve is no-op if TS is not configured */
106 return;
107
108 ctx->stats.rs_operations++;
109
110 if (cs->RS_KICKER_INPLACE) {
111 etna_cmd_stream_reserve(stream, 6);
112 etna_coalesce_start(stream, &coalesce);
113 /* 0/1 */ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
114 /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
115 /* 4/5 */ EMIT_STATE(RS_KICKER_INPLACE, cs->RS_KICKER_INPLACE);
116 etna_coalesce_end(stream, &coalesce);
117 } else if (screen->specs.pixel_pipes == 1) {
118 etna_cmd_stream_reserve(stream, 22);
119 etna_coalesce_start(stream, &coalesce);
120 /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
121 /* 2 */ EMIT_STATE_RELOC(RS_SOURCE_ADDR, &cs->source[0]);
122 /* 3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
123 /* 4 */ EMIT_STATE_RELOC(RS_DEST_ADDR, &cs->dest[0]);
124 /* 5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
125 /* 6/7 */ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
126 /* 8/9 */ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
127 /*10 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
128 /*11 - pad */
129 /*12/13*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
130 /*14 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
131 /*15 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
132 /*16 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
133 /*17 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
134 /*18/19*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
135 /*20/21*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
136 etna_coalesce_end(stream, &coalesce);
137 } else if (screen->specs.pixel_pipes == 2) {
138 etna_cmd_stream_reserve(stream, 34); /* worst case - both pipes multi=1 */
139 etna_coalesce_start(stream, &coalesce);
140 /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
141 /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
142 /* 4/5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
143 /* 6/7 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(0), &cs->source[0]);
144 if (cs->RS_SOURCE_STRIDE & VIVS_RS_SOURCE_STRIDE_MULTI) {
145 /*8 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(1), &cs->source[1]);
146 /*9 - pad */
147 }
148 /*10/11*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(0), &cs->dest[0]);
149 if (cs->RS_DEST_STRIDE & VIVS_RS_DEST_STRIDE_MULTI) {
150 /*12*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(1), &cs->dest[1]);
151 /*13 - pad */
152 }
153 /*14/15*/ EMIT_STATE(RS_PIPE_OFFSET(0), cs->RS_PIPE_OFFSET[0]);
154 /*16 */ EMIT_STATE(RS_PIPE_OFFSET(1), cs->RS_PIPE_OFFSET[1]);
155 /*17 - pad */
156 /*18/19*/ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
157 /*20/21*/ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
158 /*22 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
159 /*23 - pad */
160 /*24/25*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
161 /*26 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
162 /*27 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
163 /*28 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
164 /*29 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
165 /*30/31*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
166 /*32/33*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
167 etna_coalesce_end(stream, &coalesce);
168 } else {
169 abort();
170 }
171 }
172
173 /* Create bit field that specifies which samplers are active and thus need to be
174 * programmed
175 * 32 bits is enough for 32 samplers. As far as I know this is the upper bound
176 * supported on any Vivante hw
177 * up to GC4000.
178 */
179 static uint32_t
180 active_samplers_bits(struct etna_context *ctx)
181 {
182 return ctx->active_sampler_views & ctx->active_samplers;
183 }
184
185 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
186
187 static unsigned
188 required_stream_size(struct etna_context *ctx)
189 {
190 unsigned size = ETNA_3D_CONTEXT_SIZE;
191
192 /* stall + flush */
193 size += 2 + 4;
194
195 /* vertex elements */
196 size += ctx->vertex_elements->num_elements + 1;
197
198 /* uniforms - worst case (2 words per uniform load) */
199 size += ctx->shader.vs->uniforms.const_count * 2;
200 size += ctx->shader.fs->uniforms.const_count * 2;
201
202 /* shader */
203 size += ctx->shader_state.vs_inst_mem_size + 1;
204 size += ctx->shader_state.ps_inst_mem_size + 1;
205
206 /* DRAW_INDEXED_PRIMITIVES command */
207 size += 6;
208
209 /* reserve for alignment etc. */
210 size += 64;
211
212 return size;
213 }
214
215 /* Weave state before draw operation. This function merges all the compiled
216 * state blocks under the context into one device register state. Parts of
217 * this state that are changed since last call (dirty) will be uploaded as
218 * state changes in the command buffer. */
219 void
220 etna_emit_state(struct etna_context *ctx)
221 {
222 struct etna_cmd_stream *stream = ctx->stream;
223 uint32_t active_samplers = active_samplers_bits(ctx);
224
225 /* Pre-reserve the command buffer space which we are likely to need.
226 * This must cover all the state emitted below, and the following
227 * draw command. */
228 etna_cmd_stream_reserve(stream, required_stream_size(ctx));
229
230 uint32_t dirty = ctx->dirty;
231
232 /* Pre-processing: see what caches we need to flush before making state changes. */
233 uint32_t to_flush = 0;
234 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
235 /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
236 #if 0
237 /* TODO*/
238 if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
239 (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
240 #endif
241 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
242 }
243 if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
244 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
245 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
246 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
247 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
248 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
249
250 if (to_flush) {
251 etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
252 etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
253 }
254
255 /* Flush TS cache before changing TS configuration. */
256 if (unlikely(dirty & ETNA_DIRTY_TS)) {
257 etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
258 }
259
260 /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
261 * state to make sure it is always rewritten. */
262 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
263 if ((ctx->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
264 (ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) {
265 /* XXX what does the GPU set these states to on MSAA samples change?
266 * Does it do the right thing?
267 * (increase/decrease as necessary) or something else? Just set some
268 * invalid value until we know for
269 * sure. */
270 ctx->gpu3d.PS_INPUT_COUNT = 0xffffffff;
271 ctx->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
272 }
273 }
274
275 /* Update vertex elements. This is different from any of the other states, in that
276 * a) the number of vertex elements written matters: so write only active ones
277 * b) the vertex element states must all be written: do not skip entries that stay the same */
278 if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
279 /* Special case: vertex elements must always be sent in full if changed */
280 /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
281 ctx->vertex_elements->num_elements,
282 ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
283 if (ctx->specs.halti >= 2) {
284 /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
285 ctx->vertex_elements->num_elements,
286 ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
287 }
288 }
289
290 /* The following code is originally generated by gen_merge_state.py, to
291 * emit state in increasing order of address (this makes it possible to merge
292 * consecutive register updates into one SET_STATE command)
293 *
294 * There have been some manual changes, where the weaving operation is not
295 * simply bitwise or:
296 * - scissor fixp
297 * - num vertex elements
298 * - scissor handling
299 * - num samplers
300 * - texture lod
301 * - ETNA_DIRTY_TS
302 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
303 * change anyway
304 * - PS / framebuffer interaction for MSAA
305 * - move update of GL_MULTI_SAMPLE_CONFIG first
306 * - add unlikely()/likely()
307 */
308 struct etna_coalesce coalesce;
309
310 etna_coalesce_start(stream, &coalesce);
311
312 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
313 * directly
314 * or indirectly */
315 /* multi sample config is set first, and outside of the normal sorting
316 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
317 * possibly PS.TEMP_REGISTER_CONTROL).
318 */
319 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
320 uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
321 val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
322
323 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
324 }
325 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
326 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
327 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
328 }
329 if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS) && ctx->specs.stream_count == 1))) {
330 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
331 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
332 }
333 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
334 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
335 }
336 if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS)) && ctx->specs.stream_count > 1)) {
337 for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
338 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
339 }
340 for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
341 if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
342 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
343 }
344 }
345 }
346 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
347 /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
348 }
349 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
350 bool point_size_per_vertex =
351 etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
352
353 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
354 point_size_per_vertex
355 ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
356 : ctx->shader_state.VS_OUTPUT_COUNT);
357 }
358 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
359 /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
360 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
361 }
362 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
363 for (int x = 0; x < 4; ++x) {
364 /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
365 }
366 }
367 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
368 for (int x = 0; x < 4; ++x) {
369 /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
370 }
371 }
372 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
373 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
374 /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
375 }
376 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
377 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
378 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
379 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
380 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
381 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
382 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
383 }
384 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
385 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
386
387 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
388 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
389 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
390 }
391 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
392 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
393 }
394 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
395 uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
396 /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
397 }
398 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
399 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
400 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
401 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
402 }
403 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
404 for (int x = 0; x < 10; ++x) {
405 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
406 }
407 }
408 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
409 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
410 /* this is a bit of a mess: rasterizer.scissor determines whether to use
411 * only the framebuffer scissor, or specific scissor state, and the
412 * viewport clips too so the logic spans four CSOs */
413 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
414
415 uint32_t scissor_left =
416 MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
417 uint32_t scissor_top =
418 MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
419 uint32_t scissor_right =
420 MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
421 uint32_t scissor_bottom =
422 MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
423
424 if (rasterizer->scissor) {
425 scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
426 scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
427 scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
428 scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
429 }
430
431 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
432 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
433 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
434 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
435 }
436 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
437 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
438
439 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
440 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
441 /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
442 }
443 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
444 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
445 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
446
447 uint32_t clip_right =
448 MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
449 uint32_t clip_bottom =
450 MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
451
452 if (rasterizer->scissor) {
453 clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
454 clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
455 }
456
457 /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
458 /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
459 }
460 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
461 /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
462 }
463 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
464 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
465 for (int x = 0; x < 4; ++x) {
466 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
467 }
468 for (int x = 0; x < 16; ++x) {
469 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
470 }
471 }
472 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
473 /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
474 /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
475 /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
476 ctx->framebuffer.msaa_mode
477 ? ctx->shader_state.PS_INPUT_COUNT_MSAA
478 : ctx->shader_state.PS_INPUT_COUNT);
479 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
480 ctx->framebuffer.msaa_mode
481 ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
482 : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
483 /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
484 /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
485 }
486 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
487 uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
488 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG);
489 }
490 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
491 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
492 /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
493 }
494 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
495 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
496
497 if (ctx->specs.pixel_pipes == 1) {
498 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
499 }
500
501 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
502 }
503 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
504 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP;
505 /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
506 }
507 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) {
508 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG;
509 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG);
510 }
511 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
512 uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
513 /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
514 }
515 if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
516 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
517 }
518 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
519 uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
520 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
521 }
522 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
523 uint32_t val;
524 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
525 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
526 val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
527 VIVS_PE_COLOR_FORMAT_OVERWRITE);
528 val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
529 val &= ctx->framebuffer.PE_COLOR_FORMAT;
530 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
531 }
532 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
533 if (ctx->specs.pixel_pipes == 1) {
534 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
535 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
536 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
537 } else if (ctx->specs.pixel_pipes == 2) {
538 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
539 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
540 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
541 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
542 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
543 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
544 } else {
545 abort();
546 }
547 }
548 if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) {
549 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT);
550 }
551 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
552 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
553 /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
554 }
555 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
556 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
557 for (int x = 0; x < 2; ++x) {
558 /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
559 }
560 }
561 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
562 /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
563 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
564 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
565 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
566 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
567 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
568 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
569 }
570 if (unlikely(dirty & ETNA_DIRTY_SAMPLER_VIEWS)) {
571 for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
572 if ((1 << x) & active_samplers) {
573 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
574 /*01720*/ EMIT_STATE(TS_SAMPLER_CONFIG(x), sv->TS_SAMPLER_CONFIG);
575 }
576 }
577 for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
578 if ((1 << x) & active_samplers) {
579 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
580 /*01740*/ EMIT_STATE_RELOC(TS_SAMPLER_STATUS_BASE(x), &sv->TS_SAMPLER_STATUS_BASE);
581 }
582 }
583 for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
584 if ((1 << x) & active_samplers) {
585 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
586 /*01760*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE(x), sv->TS_SAMPLER_CLEAR_VALUE);
587 }
588 }
589 for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
590 if ((1 << x) & active_samplers) {
591 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
592 /*01780*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE2(x), sv->TS_SAMPLER_CLEAR_VALUE2);
593 }
594 }
595 }
596 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
597 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
598 uint32_t val = 0; /* 0 == sampler inactive */
599
600 /* set active samplers to their configuration value (determined by both
601 * the sampler state and sampler view) */
602 if ((1 << x) & active_samplers) {
603 struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]);
604 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
605
606 val = (ss->TE_SAMPLER_CONFIG0 & sv->TE_SAMPLER_CONFIG0_MASK) |
607 sv->TE_SAMPLER_CONFIG0;
608 }
609
610 /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), val);
611 }
612 }
613 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
614 struct etna_sampler_view *sv;
615
616 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
617 if ((1 << x) & active_samplers) {
618 sv = etna_sampler_view(ctx->sampler_view[x]);
619 /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->TE_SAMPLER_SIZE);
620 }
621 }
622 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
623 if ((1 << x) & active_samplers) {
624 sv = etna_sampler_view(ctx->sampler_view[x]);
625 /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE);
626 }
627 }
628 }
629 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
630 struct etna_sampler_state *ss;
631 struct etna_sampler_view *sv;
632
633 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
634 if ((1 << x) & active_samplers) {
635 ss = etna_sampler_state(ctx->sampler[x]);
636 sv = etna_sampler_view(ctx->sampler_view[x]);
637
638 /* min and max lod is determined both by the sampler and the view */
639 /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x),
640 ss->TE_SAMPLER_LOD_CONFIG |
641 VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(ss->max_lod, sv->max_lod)) |
642 VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss->min_lod, sv->min_lod)));
643 }
644 }
645 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
646 if ((1 << x) & active_samplers) {
647 ss = etna_sampler_state(ctx->sampler[x]);
648 sv = etna_sampler_view(ctx->sampler_view[x]);
649
650 /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->TE_SAMPLER_CONFIG1 |
651 sv->TE_SAMPLER_CONFIG1);
652 }
653 }
654 }
655 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
656 for (int y = 0; y < VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y) {
657 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
658 if ((1 << x) & active_samplers) {
659 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
660 /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y),&sv->TE_SAMPLER_LOD_ADDR[y]);
661 }
662 }
663 }
664 }
665 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
666 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
667 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
668 for (int x = 0; x < 2; ++x) {
669 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
670 }
671 }
672 if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) {
673 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
674 if ((1 << x) & active_samplers) {
675 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
676 /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x), sv->TE_SAMPLER_ASTC0);
677 }
678 }
679 }
680 etna_coalesce_end(stream, &coalesce);
681 /* end only EMIT_STATE */
682
683 /* Insert a FE/PE stall as changing the shader instructions (and maybe
684 * the uniforms) can corrupt the previous in-progress draw operation.
685 * Observed with amoeba on GC2000 during the right-to-left rendering
686 * of PI, and can cause GPU hangs immediately after.
687 * I summise that this is because the "new" locations at 0xc000 are not
688 * properly protected against updates as other states seem to be. Hence,
689 * we detect the "new" vertex shader instruction offset to apply this. */
690 if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
691 etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
692
693 /* We need to update the uniform cache only if one of the following bits are
694 * set in ctx->dirty:
695 * - ETNA_DIRTY_SHADER
696 * - ETNA_DIRTY_CONSTBUF
697 * - uniforms_dirty_bits
698 *
699 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
700 * all
701 * other cases we can load on the changed uniforms.
702 */
703 static const uint32_t uniform_dirty_bits =
704 ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
705
706 if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
707 etna_uniforms_write(
708 ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
709 ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
710
711 if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
712 etna_uniforms_write(
713 ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
714 ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
715
716 /**** Large dynamically-sized state ****/
717 if (dirty & (ETNA_DIRTY_SHADER)) {
718 /* Special case: a new shader was loaded; simply re-load all uniforms and
719 * shader code at once */
720 if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
721 assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
722 /* Set icache (VS) */
723 etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
724 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
725 VIVS_VS_ICACHE_CONTROL_ENABLE |
726 VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
727 assert(ctx->shader_state.VS_INST_ADDR.bo);
728 etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
729
730 /* Set icache (PS) */
731 etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
732 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
733 VIVS_VS_ICACHE_CONTROL_ENABLE |
734 VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
735 assert(ctx->shader_state.PS_INST_ADDR.bo);
736 etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
737 } else {
738 /* Upload shader directly, first flushing and disabling icache if
739 * supported on this hw */
740 if (ctx->specs.has_icache) {
741 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
742 VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
743 VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
744 }
745 if (ctx->specs.has_shader_range_registers) {
746 etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
747 etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
748 0x100);
749 }
750 etna_set_state_multi(stream, ctx->specs.vs_offset,
751 ctx->shader_state.vs_inst_mem_size,
752 ctx->shader_state.VS_INST_MEM);
753 etna_set_state_multi(stream, ctx->specs.ps_offset,
754 ctx->shader_state.ps_inst_mem_size,
755 ctx->shader_state.PS_INST_MEM);
756 }
757
758 if (ctx->specs.has_unified_uniforms) {
759 etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
760 etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
761 }
762 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
763 etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
764 ctx->shader_state.vs_uniforms_size,
765 ctx->shader_state.VS_UNIFORMS);
766 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
767 etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
768 ctx->shader_state.ps_uniforms_size,
769 ctx->shader_state.PS_UNIFORMS);
770
771 /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
772 * possible as long as the
773 * same shader remains bound */
774 ctx->gpu3d.vs_uniforms_size = ctx->shader_state.vs_uniforms_size;
775 ctx->gpu3d.ps_uniforms_size = ctx->shader_state.ps_uniforms_size;
776 memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
777 ctx->shader_state.vs_uniforms_size * 4);
778 memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
779 ctx->shader_state.ps_uniforms_size * 4);
780 } else {
781 /* ideally this cache would only be flushed if there are VS uniform changes */
782 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
783 etna_coalesce_start(stream, &coalesce);
784 for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
785 if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
786 etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]);
787 ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
788 }
789 }
790 etna_coalesce_end(stream, &coalesce);
791
792 /* ideally this cache would only be flushed if there are PS uniform changes */
793 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
794 etna_coalesce_start(stream, &coalesce);
795 for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
796 if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
797 etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]);
798 ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
799 }
800 }
801 etna_coalesce_end(stream, &coalesce);
802 }
803 /**** End of state update ****/
804 #undef EMIT_STATE
805 #undef EMIT_STATE_FIXP
806 #undef EMIT_STATE_RELOC
807 ctx->dirty = 0;
808 }