etnaviv: update HW headers and fix provoking vertex
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_emit.c
1 /*
2 * Copyright (c) 2014-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 #include "etnaviv_emit.h"
28
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "util/u_math.h"
45
46 struct etna_coalesce {
47 uint32_t start;
48 uint32_t last_reg;
49 uint32_t last_fixp;
50 };
51
52 /* Queue a STALL command (queues 2 words) */
53 static inline void
54 CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
55 {
56 etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
57 etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
58 }
59
60 void
61 etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
62 {
63 etna_cmd_stream_reserve(stream, 4);
64
65 etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
66 etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
67
68 if (from == SYNC_RECIPIENT_FE) {
69 /* if the frontend is to be stalled, queue a STALL frontend command */
70 CMD_STALL(stream, from, to);
71 } else {
72 /* otherwise, load the STALL token state */
73 etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
74 etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
75 }
76 }
77
78 static void
79 etna_coalesce_start(struct etna_cmd_stream *stream,
80 struct etna_coalesce *coalesce)
81 {
82 coalesce->start = etna_cmd_stream_offset(stream);
83 coalesce->last_reg = 0;
84 coalesce->last_fixp = 0;
85 }
86
87 static void
88 etna_coalesce_end(struct etna_cmd_stream *stream,
89 struct etna_coalesce *coalesce)
90 {
91 uint32_t end = etna_cmd_stream_offset(stream);
92 uint32_t size = end - coalesce->start;
93
94 if (size) {
95 uint32_t offset = coalesce->start - 1;
96 uint32_t value = etna_cmd_stream_get(stream, offset);
97
98 value |= VIV_FE_LOAD_STATE_HEADER_COUNT(size);
99 etna_cmd_stream_set(stream, offset, value);
100 }
101
102 /* append needed padding */
103 if (end % 2 == 1)
104 etna_cmd_stream_emit(stream, 0xdeadbeef);
105 }
106
107 static void
108 check_coalsence(struct etna_cmd_stream *stream, struct etna_coalesce *coalesce,
109 uint32_t reg, uint32_t fixp)
110 {
111 if (coalesce->last_reg != 0) {
112 if (((coalesce->last_reg + 4) != reg) || (coalesce->last_fixp != fixp)) {
113 etna_coalesce_end(stream, coalesce);
114 etna_emit_load_state(stream, reg >> 2, 0, fixp);
115 coalesce->start = etna_cmd_stream_offset(stream);
116 }
117 } else {
118 etna_emit_load_state(stream, reg >> 2, 0, fixp);
119 coalesce->start = etna_cmd_stream_offset(stream);
120 }
121
122 coalesce->last_reg = reg;
123 coalesce->last_fixp = fixp;
124 }
125
126 static inline void
127 etna_coalsence_emit(struct etna_cmd_stream *stream,
128 struct etna_coalesce *coalesce, uint32_t reg,
129 uint32_t value)
130 {
131 check_coalsence(stream, coalesce, reg, 0);
132 etna_cmd_stream_emit(stream, value);
133 }
134
135 static inline void
136 etna_coalsence_emit_fixp(struct etna_cmd_stream *stream,
137 struct etna_coalesce *coalesce, uint32_t reg,
138 uint32_t value)
139 {
140 check_coalsence(stream, coalesce, reg, 1);
141 etna_cmd_stream_emit(stream, value);
142 }
143
144 static inline void
145 etna_coalsence_emit_reloc(struct etna_cmd_stream *stream,
146 struct etna_coalesce *coalesce, uint32_t reg,
147 const struct etna_reloc *r)
148 {
149 if (r->bo) {
150 check_coalsence(stream, coalesce, reg, 0);
151 etna_cmd_stream_reloc(stream, r);
152 }
153 }
154
155 #define EMIT_STATE(state_name, src_value) \
156 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
157
158 #define EMIT_STATE_FIXP(state_name, src_value) \
159 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
160
161 #define EMIT_STATE_RELOC(state_name, src_value) \
162 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
163
164 /* submit RS state, without any processing and no dependence on context
165 * except TS if this is a source-to-destination blit. */
166 void
167 etna_submit_rs_state(struct etna_context *ctx,
168 const struct compiled_rs_state *cs)
169 {
170 struct etna_screen *screen = etna_screen(ctx->base.screen);
171 struct etna_cmd_stream *stream = ctx->stream;
172 struct etna_coalesce coalesce;
173
174 ctx->stats.rs_operations++;
175
176 if (screen->specs.pixel_pipes == 1) {
177 etna_cmd_stream_reserve(stream, 22);
178 etna_coalesce_start(stream, &coalesce);
179 /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
180 /* 2 */ EMIT_STATE_RELOC(RS_SOURCE_ADDR, &cs->source[0]);
181 /* 3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
182 /* 4 */ EMIT_STATE_RELOC(RS_DEST_ADDR, &cs->dest[0]);
183 /* 5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
184 /* 6/7 */ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
185 /* 8/9 */ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
186 /*10 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
187 /*11 - pad */
188 /*12/13*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
189 /*14 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
190 /*15 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
191 /*16 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
192 /*17 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
193 /*18/19*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
194 /*20/21*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
195 etna_coalesce_end(stream, &coalesce);
196 } else if (screen->specs.pixel_pipes == 2) {
197 etna_cmd_stream_reserve(stream, 34); /* worst case - both pipes multi=1 */
198 etna_coalesce_start(stream, &coalesce);
199 /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
200 /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
201 /* 4/5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
202 /* 6/7 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(0), &cs->source[0]);
203 if (cs->RS_SOURCE_STRIDE & VIVS_RS_SOURCE_STRIDE_MULTI) {
204 /*8 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(1), &cs->source[1]);
205 /*9 - pad */
206 }
207 /*10/11*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(0), &cs->dest[0]);
208 if (cs->RS_DEST_STRIDE & VIVS_RS_DEST_STRIDE_MULTI) {
209 /*12*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(1), &cs->dest[1]);
210 /*13 - pad */
211 }
212 /*14/15*/ EMIT_STATE(RS_PIPE_OFFSET(0), cs->RS_PIPE_OFFSET[0]);
213 /*16 */ EMIT_STATE(RS_PIPE_OFFSET(1), cs->RS_PIPE_OFFSET[1]);
214 /*17 - pad */
215 /*18/19*/ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
216 /*20/21*/ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
217 /*22 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
218 /*23 - pad */
219 /*24/25*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
220 /*26 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
221 /*27 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
222 /*28 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
223 /*29 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
224 /*30/31*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
225 /*32/33*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
226 etna_coalesce_end(stream, &coalesce);
227 } else {
228 abort();
229 }
230 }
231
232 /* Create bit field that specifies which samplers are active and thus need to be
233 * programmed
234 * 32 bits is enough for 32 samplers. As far as I know this is the upper bound
235 * supported on any Vivante hw
236 * up to GC4000.
237 */
238 static uint32_t
239 active_samplers_bits(struct etna_context *ctx)
240 {
241 return ctx->active_sampler_views & ctx->active_samplers;
242 }
243
244 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
245
246 static unsigned
247 required_stream_size(struct etna_context *ctx)
248 {
249 unsigned size = ETNA_3D_CONTEXT_SIZE;
250
251 /* stall + flush */
252 size += 2 + 4;
253
254 /* vertex elements */
255 size += ctx->vertex_elements->num_elements + 1;
256
257 /* uniforms - worst case (2 words per uniform load) */
258 size += ctx->shader.vs->uniforms.const_count * 2;
259 size += ctx->shader.fs->uniforms.const_count * 2;
260
261 /* shader */
262 size += ctx->shader_state.vs_inst_mem_size + 1;
263 size += ctx->shader_state.ps_inst_mem_size + 1;
264
265 /* DRAW_INDEXED_PRIMITIVES command */
266 size += 6;
267
268 /* reserve for alignment etc. */
269 size += 64;
270
271 return size;
272 }
273
274 /* Weave state before draw operation. This function merges all the compiled
275 * state blocks under the context into one device register state. Parts of
276 * this state that are changed since last call (dirty) will be uploaded as
277 * state changes in the command buffer. */
278 void
279 etna_emit_state(struct etna_context *ctx)
280 {
281 struct etna_cmd_stream *stream = ctx->stream;
282 uint32_t active_samplers = active_samplers_bits(ctx);
283
284 /* Pre-reserve the command buffer space which we are likely to need.
285 * This must cover all the state emitted below, and the following
286 * draw command. */
287 etna_cmd_stream_reserve(stream, required_stream_size(ctx));
288
289 uint32_t dirty = ctx->dirty;
290
291 /* Pre-processing: see what caches we need to flush before making state changes. */
292 uint32_t to_flush = 0;
293 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
294 /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
295 #if 0
296 /* TODO*/
297 if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
298 (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
299 #endif
300 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
301 }
302 if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
303 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
304 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
305 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
306 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
307 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
308
309 if (to_flush) {
310 etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
311 etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
312 }
313
314 /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
315 * state to make sure it is always rewritten. */
316 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
317 if ((ctx->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
318 (ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) {
319 /* XXX what does the GPU set these states to on MSAA samples change?
320 * Does it do the right thing?
321 * (increase/decrease as necessary) or something else? Just set some
322 * invalid value until we know for
323 * sure. */
324 ctx->gpu3d.PS_INPUT_COUNT = 0xffffffff;
325 ctx->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
326 }
327 }
328
329 /* Update vertex elements. This is different from any of the other states, in that
330 * a) the number of vertex elements written matters: so write only active ones
331 * b) the vertex element states must all be written: do not skip entries that stay the same */
332 if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
333 /* Special case: vertex elements must always be sent in full if changed */
334 /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
335 ctx->vertex_elements->num_elements,
336 ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
337 }
338
339 /* The following code is originally generated by gen_merge_state.py, to
340 * emit state in increasing order of address (this makes it possible to merge
341 * consecutive register updates into one SET_STATE command)
342 *
343 * There have been some manual changes, where the weaving operation is not
344 * simply bitwise or:
345 * - scissor fixp
346 * - num vertex elements
347 * - scissor handling
348 * - num samplers
349 * - texture lod
350 * - ETNA_DIRTY_TS
351 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
352 * change anyway
353 * - PS / framebuffer interaction for MSAA
354 * - move update of GL_MULTI_SAMPLE_CONFIG first
355 * - add unlikely()/likely()
356 */
357 struct etna_coalesce coalesce;
358
359 etna_coalesce_start(stream, &coalesce);
360
361 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
362 * directly
363 * or indirectly */
364 /* multi sample config is set first, and outside of the normal sorting
365 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
366 * possibly PS.TEMP_REGISTER_CONTROL).
367 */
368 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
369 uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
370 val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
371
372 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
373 }
374 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
375 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
376 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
377 }
378 if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
379 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
380 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
381 }
382 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
383 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
384 }
385 if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
386 for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
387 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
388 }
389 for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
390 if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
391 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
392 }
393 }
394 }
395 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
396 /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
397 }
398 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
399 bool point_size_per_vertex =
400 etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
401
402 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
403 point_size_per_vertex
404 ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
405 : ctx->shader_state.VS_OUTPUT_COUNT);
406 }
407 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
408 /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
409 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
410 }
411 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
412 for (int x = 0; x < 4; ++x) {
413 /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
414 }
415 }
416 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
417 for (int x = 0; x < 4; ++x) {
418 /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
419 }
420 }
421 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
422 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
423 /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
424 }
425 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
426 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
427 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
428 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
429 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
430 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
431 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
432 }
433 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
434 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
435
436 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
437 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
438 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
439 }
440 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
441 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
442 }
443 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
444 uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
445 /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
446 }
447 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
448 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
449 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
450 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
451 }
452 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
453 for (int x = 0; x < 10; ++x) {
454 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
455 }
456 }
457 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
458 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
459 /* this is a bit of a mess: rasterizer.scissor determines whether to use
460 * only the framebuffer scissor, or specific scissor state, and the
461 * viewport clips too so the logic spans four CSOs */
462 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
463
464 uint32_t scissor_left =
465 MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
466 uint32_t scissor_top =
467 MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
468 uint32_t scissor_right =
469 MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
470 uint32_t scissor_bottom =
471 MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
472
473 if (rasterizer->scissor) {
474 scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
475 scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
476 scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
477 scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
478 }
479
480 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
481 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
482 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
483 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
484 }
485 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
486 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
487
488 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
489 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
490 /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
491 }
492 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
493 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
494 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
495
496 uint32_t clip_right =
497 MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
498 uint32_t clip_bottom =
499 MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
500
501 if (rasterizer->scissor) {
502 clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
503 clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
504 }
505
506 /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
507 /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
508 }
509 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
510 /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
511 }
512 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
513 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
514 for (int x = 0; x < 4; ++x) {
515 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
516 }
517 for (int x = 0; x < 16; ++x) {
518 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
519 }
520 }
521 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
522 /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
523 /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
524 /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
525 ctx->framebuffer.msaa_mode
526 ? ctx->shader_state.PS_INPUT_COUNT_MSAA
527 : ctx->shader_state.PS_INPUT_COUNT);
528 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
529 ctx->framebuffer.msaa_mode
530 ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
531 : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
532 /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
533 /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
534 }
535 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
536 uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
537 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG);
538 }
539 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
540 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
541 /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
542 }
543 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
544 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
545
546 if (ctx->specs.pixel_pipes == 1) {
547 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
548 }
549
550 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
551 }
552 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
553 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP;
554 /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
555 }
556 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) {
557 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG;
558 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG);
559 }
560 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
561 uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
562 /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
563 }
564 if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
565 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
566 }
567 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
568 uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
569 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
570 }
571 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
572 uint32_t val;
573 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
574 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
575 val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
576 VIVS_PE_COLOR_FORMAT_OVERWRITE);
577 val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
578 val &= ctx->framebuffer.PE_COLOR_FORMAT;
579 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
580 }
581 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
582 if (ctx->specs.pixel_pipes == 1) {
583 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
584 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
585 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
586 } else if (ctx->specs.pixel_pipes == 2) {
587 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
588 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
589 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
590 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
591 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
592 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
593 } else {
594 abort();
595 }
596 }
597 if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) {
598 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT);
599 }
600 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
601 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
602 /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
603 }
604 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
605 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
606 for (int x = 0; x < 2; ++x) {
607 /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
608 }
609 }
610 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
611 /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
612 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
613 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
614 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
615 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
616 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
617 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
618 }
619 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
620 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
621 uint32_t val = 0; /* 0 == sampler inactive */
622
623 /* set active samplers to their configuration value (determined by both
624 * the sampler state and sampler view) */
625 if ((1 << x) & active_samplers) {
626 struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]);
627 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
628
629 val = (ss->TE_SAMPLER_CONFIG0 & sv->TE_SAMPLER_CONFIG0_MASK) |
630 sv->TE_SAMPLER_CONFIG0;
631 }
632
633 /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), val);
634 }
635 }
636 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
637 struct etna_sampler_view *sv;
638
639 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
640 if ((1 << x) & active_samplers) {
641 sv = etna_sampler_view(ctx->sampler_view[x]);
642 /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->TE_SAMPLER_SIZE);
643 }
644 }
645 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
646 if ((1 << x) & active_samplers) {
647 sv = etna_sampler_view(ctx->sampler_view[x]);
648 /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE);
649 }
650 }
651 }
652 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
653 struct etna_sampler_state *ss;
654 struct etna_sampler_view *sv;
655
656 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
657 if ((1 << x) & active_samplers) {
658 ss = etna_sampler_state(ctx->sampler[x]);
659 sv = etna_sampler_view(ctx->sampler_view[x]);
660
661 /* min and max lod is determined both by the sampler and the view */
662 /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x),
663 ss->TE_SAMPLER_LOD_CONFIG |
664 VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(ss->max_lod, sv->max_lod)) |
665 VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss->min_lod, sv->min_lod)));
666 }
667 }
668 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
669 if ((1 << x) & active_samplers) {
670 ss = etna_sampler_state(ctx->sampler[x]);
671 sv = etna_sampler_view(ctx->sampler_view[x]);
672
673 /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->TE_SAMPLER_CONFIG1 |
674 sv->TE_SAMPLER_CONFIG1);
675 }
676 }
677 }
678 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
679 for (int y = 0; y < VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y) {
680 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
681 if ((1 << x) & active_samplers) {
682 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
683 /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y),&sv->TE_SAMPLER_LOD_ADDR[y]);
684 }
685 }
686 }
687 }
688 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
689 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
690 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
691 for (int x = 0; x < 2; ++x) {
692 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
693 }
694 }
695 etna_coalesce_end(stream, &coalesce);
696 /* end only EMIT_STATE */
697
698 /* Insert a FE/PE stall as changing the shader instructions (and maybe
699 * the uniforms) can corrupt the previous in-progress draw operation.
700 * Observed with amoeba on GC2000 during the right-to-left rendering
701 * of PI, and can cause GPU hangs immediately after.
702 * I summise that this is because the "new" locations at 0xc000 are not
703 * properly protected against updates as other states seem to be. Hence,
704 * we detect the "new" vertex shader instruction offset to apply this. */
705 if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
706 etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
707
708 /* We need to update the uniform cache only if one of the following bits are
709 * set in ctx->dirty:
710 * - ETNA_DIRTY_SHADER
711 * - ETNA_DIRTY_CONSTBUF
712 * - uniforms_dirty_bits
713 *
714 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
715 * all
716 * other cases we can load on the changed uniforms.
717 */
718 static const uint32_t uniform_dirty_bits =
719 ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
720
721 if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
722 etna_uniforms_write(
723 ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
724 ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
725
726 if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
727 etna_uniforms_write(
728 ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
729 ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
730
731 /**** Large dynamically-sized state ****/
732 if (dirty & (ETNA_DIRTY_SHADER)) {
733 /* Special case: a new shader was loaded; simply re-load all uniforms and
734 * shader code at once */
735 if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
736 assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
737 /* Set icache (VS) */
738 etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
739 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
740 VIVS_VS_ICACHE_CONTROL_ENABLE |
741 VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
742 assert(ctx->shader_state.VS_INST_ADDR.bo);
743 etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
744
745 /* Set icache (PS) */
746 etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
747 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
748 VIVS_VS_ICACHE_CONTROL_ENABLE |
749 VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
750 assert(ctx->shader_state.PS_INST_ADDR.bo);
751 etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
752 } else {
753 /* Upload shader directly, first flushing and disabling icache if
754 * supported on this hw */
755 if (ctx->specs.has_icache) {
756 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
757 VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
758 VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
759 }
760 if (ctx->specs.has_shader_range_registers) {
761 etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
762 etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
763 0x100);
764 }
765 etna_set_state_multi(stream, ctx->specs.vs_offset,
766 ctx->shader_state.vs_inst_mem_size,
767 ctx->shader_state.VS_INST_MEM);
768 etna_set_state_multi(stream, ctx->specs.ps_offset,
769 ctx->shader_state.ps_inst_mem_size,
770 ctx->shader_state.PS_INST_MEM);
771 }
772
773 if (ctx->specs.has_unified_uniforms) {
774 etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
775 etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
776 }
777 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
778 etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
779 ctx->shader_state.vs_uniforms_size,
780 ctx->shader_state.VS_UNIFORMS);
781 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
782 etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
783 ctx->shader_state.ps_uniforms_size,
784 ctx->shader_state.PS_UNIFORMS);
785
786 /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
787 * possible as long as the
788 * same shader remains bound */
789 ctx->gpu3d.vs_uniforms_size = ctx->shader_state.vs_uniforms_size;
790 ctx->gpu3d.ps_uniforms_size = ctx->shader_state.ps_uniforms_size;
791 memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
792 ctx->shader_state.vs_uniforms_size * 4);
793 memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
794 ctx->shader_state.ps_uniforms_size * 4);
795 } else {
796 /* ideally this cache would only be flushed if there are VS uniform changes */
797 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
798 etna_coalesce_start(stream, &coalesce);
799 for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
800 if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
801 etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]);
802 ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
803 }
804 }
805 etna_coalesce_end(stream, &coalesce);
806
807 /* ideally this cache would only be flushed if there are PS uniform changes */
808 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
809 etna_coalesce_start(stream, &coalesce);
810 for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
811 if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
812 etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]);
813 ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
814 }
815 }
816 etna_coalesce_end(stream, &coalesce);
817 }
818 /**** End of state update ****/
819 #undef EMIT_STATE
820 #undef EMIT_STATE_FIXP
821 #undef EMIT_STATE_RELOC
822 ctx->dirty = 0;
823 }