etnaviv: gallium driver for Vivante GPUs
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_emit.c
1 /*
2 * Copyright (c) 2014-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 #include "etnaviv_emit.h"
28
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "util/u_math.h"
45
46 struct etna_coalesce {
47 uint32_t start;
48 uint32_t last_reg;
49 uint32_t last_fixp;
50 };
51
52 /* Queue a STALL command (queues 2 words) */
53 static inline void
54 CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
55 {
56 etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
57 etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
58 }
59
60 void
61 etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
62 {
63 etna_cmd_stream_reserve(stream, 4);
64
65 etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
66 etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
67
68 if (from == SYNC_RECIPIENT_FE) {
69 /* if the frontend is to be stalled, queue a STALL frontend command */
70 CMD_STALL(stream, from, to);
71 } else {
72 /* otherwise, load the STALL token state */
73 etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
74 etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
75 }
76 }
77
78 static void
79 etna_coalesce_start(struct etna_cmd_stream *stream,
80 struct etna_coalesce *coalesce)
81 {
82 coalesce->start = etna_cmd_stream_offset(stream);
83 coalesce->last_reg = 0;
84 coalesce->last_fixp = 0;
85 }
86
87 static void
88 etna_coalesce_end(struct etna_cmd_stream *stream,
89 struct etna_coalesce *coalesce)
90 {
91 uint32_t end = etna_cmd_stream_offset(stream);
92 uint32_t size = end - coalesce->start;
93
94 if (size) {
95 uint32_t offset = coalesce->start - 1;
96 uint32_t value = etna_cmd_stream_get(stream, offset);
97
98 value |= VIV_FE_LOAD_STATE_HEADER_COUNT(size);
99 etna_cmd_stream_set(stream, offset, value);
100 }
101
102 /* append needed padding */
103 if (end % 2 == 1)
104 etna_cmd_stream_emit(stream, 0xdeadbeef);
105 }
106
107 static void
108 check_coalsence(struct etna_cmd_stream *stream, struct etna_coalesce *coalesce,
109 uint32_t reg, uint32_t fixp)
110 {
111 if (coalesce->last_reg != 0) {
112 if (((coalesce->last_reg + 4) != reg) || (coalesce->last_fixp != fixp)) {
113 etna_coalesce_end(stream, coalesce);
114 etna_emit_load_state(stream, reg >> 2, 0, fixp);
115 coalesce->start = etna_cmd_stream_offset(stream);
116 }
117 } else {
118 etna_emit_load_state(stream, reg >> 2, 0, fixp);
119 coalesce->start = etna_cmd_stream_offset(stream);
120 }
121
122 coalesce->last_reg = reg;
123 coalesce->last_fixp = fixp;
124 }
125
126 static inline void
127 etna_coalsence_emit(struct etna_cmd_stream *stream,
128 struct etna_coalesce *coalesce, uint32_t reg,
129 uint32_t value)
130 {
131 check_coalsence(stream, coalesce, reg, 0);
132 etna_cmd_stream_emit(stream, value);
133 }
134
135 static inline void
136 etna_coalsence_emit_fixp(struct etna_cmd_stream *stream,
137 struct etna_coalesce *coalesce, uint32_t reg,
138 uint32_t value)
139 {
140 check_coalsence(stream, coalesce, reg, 1);
141 etna_cmd_stream_emit(stream, value);
142 }
143
144 static inline void
145 etna_coalsence_emit_reloc(struct etna_cmd_stream *stream,
146 struct etna_coalesce *coalesce, uint32_t reg,
147 const struct etna_reloc *r)
148 {
149 if (r->bo) {
150 check_coalsence(stream, coalesce, reg, 0);
151 etna_cmd_stream_reloc(stream, r);
152 }
153 }
154
155 #define EMIT_STATE(state_name, src_value) \
156 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
157
158 #define EMIT_STATE_FIXP(state_name, src_value) \
159 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
160
161 #define EMIT_STATE_RELOC(state_name, src_value) \
162 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
163
164 /* submit RS state, without any processing and no dependence on context
165 * except TS if this is a source-to-destination blit. */
166 void
167 etna_submit_rs_state(struct etna_context *ctx,
168 const struct compiled_rs_state *cs)
169 {
170 struct etna_screen *screen = etna_screen(ctx->base.screen);
171 struct etna_cmd_stream *stream = ctx->stream;
172 struct etna_coalesce coalesce;
173
174 if (screen->specs.pixel_pipes == 1) {
175 etna_cmd_stream_reserve(stream, 22);
176 etna_coalesce_start(stream, &coalesce);
177 /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
178 /* 2 */ EMIT_STATE_RELOC(RS_SOURCE_ADDR, &cs->source[0]);
179 /* 3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
180 /* 4 */ EMIT_STATE_RELOC(RS_DEST_ADDR, &cs->dest[0]);
181 /* 5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
182 /* 6/7 */ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
183 /* 8/9 */ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
184 /*10 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
185 /*11 - pad */
186 /*12/13*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
187 /*14 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
188 /*15 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
189 /*16 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
190 /*17 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
191 /*18/19*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
192 /*20/21*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
193 etna_coalesce_end(stream, &coalesce);
194 } else if (screen->specs.pixel_pipes == 2) {
195 etna_cmd_stream_reserve(stream, 34); /* worst case - both pipes multi=1 */
196 etna_coalesce_start(stream, &coalesce);
197 /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
198 /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
199 /* 4/5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
200 /* 6/7 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(0), &cs->source[0]);
201 if (cs->RS_SOURCE_STRIDE & VIVS_RS_SOURCE_STRIDE_MULTI) {
202 /*8 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(1), &cs->source[1]);
203 /*9 - pad */
204 }
205 /*10/11*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(0), &cs->dest[0]);
206 if (cs->RS_DEST_STRIDE & VIVS_RS_DEST_STRIDE_MULTI) {
207 /*12*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(1), &cs->dest[1]);
208 /*13 - pad */
209 }
210 /*14/15*/ EMIT_STATE(RS_PIPE_OFFSET(0), cs->RS_PIPE_OFFSET[0]);
211 /*16 */ EMIT_STATE(RS_PIPE_OFFSET(1), cs->RS_PIPE_OFFSET[1]);
212 /*17 - pad */
213 /*18/19*/ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
214 /*20/21*/ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
215 /*22 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
216 /*23 - pad */
217 /*24/25*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
218 /*26 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
219 /*27 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
220 /*28 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
221 /*29 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
222 /*30/31*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
223 /*32/33*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
224 etna_coalesce_end(stream, &coalesce);
225 } else {
226 abort();
227 }
228 }
229
230 /* Create bit field that specifies which samplers are active and thus need to be
231 * programmed
232 * 32 bits is enough for 32 samplers. As far as I know this is the upper bound
233 * supported on any Vivante hw
234 * up to GC4000.
235 */
236 static uint32_t
237 active_samplers_bits(struct etna_context *ctx)
238 {
239 return ctx->active_sampler_views & ctx->active_samplers;
240 }
241
242 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
243
244 static unsigned
245 required_stream_size(struct etna_context *ctx)
246 {
247 unsigned size = ETNA_3D_CONTEXT_SIZE;
248
249 /* stall + flush */
250 size += 2 + 4;
251
252 /* vertex elements */
253 size += ctx->vertex_elements->num_elements + 1;
254
255 /* uniforms - worst case (2 words per uniform load) */
256 size += ctx->vs->uniforms.const_count * 2;
257 size += ctx->fs->uniforms.const_count * 2;
258
259 /* shader */
260 size += ctx->shader_state.vs_inst_mem_size + 1;
261 size += ctx->shader_state.ps_inst_mem_size + 1;
262
263 /* DRAW_INDEXED_PRIMITIVES command */
264 size += 6;
265
266 /* reserve for alignment etc. */
267 size += 64;
268
269 return size;
270 }
271
272 /* Weave state before draw operation. This function merges all the compiled
273 * state blocks under the context into one device register state. Parts of
274 * this state that are changed since last call (dirty) will be uploaded as
275 * state changes in the command buffer. */
276 void
277 etna_emit_state(struct etna_context *ctx)
278 {
279 struct etna_cmd_stream *stream = ctx->stream;
280 uint32_t active_samplers = active_samplers_bits(ctx);
281
282 /* Pre-reserve the command buffer space which we are likely to need.
283 * This must cover all the state emitted below, and the following
284 * draw command. */
285 etna_cmd_stream_reserve(stream, required_stream_size(ctx));
286
287 uint32_t dirty = ctx->dirty;
288
289 /* Pre-processing: see what caches we need to flush before making state changes. */
290 uint32_t to_flush = 0;
291 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
292 /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
293 #if 0
294 /* TODO*/
295 if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
296 (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
297 #endif
298 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
299 }
300 if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
301 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
302 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
303 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
304 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
305 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
306
307 if (to_flush) {
308 etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
309 etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
310 }
311
312 /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
313 * state to make sure it is always rewritten. */
314 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
315 if ((ctx->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
316 (ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) {
317 /* XXX what does the GPU set these states to on MSAA samples change?
318 * Does it do the right thing?
319 * (increase/decrease as necessary) or something else? Just set some
320 * invalid value until we know for
321 * sure. */
322 ctx->gpu3d.PS_INPUT_COUNT = 0xffffffff;
323 ctx->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
324 }
325 }
326
327 /* Update vertex elements. This is different from any of the other states, in that
328 * a) the number of vertex elements written matters: so write only active ones
329 * b) the vertex element states must all be written: do not skip entries that stay the same */
330 if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
331 /* Special case: vertex elements must always be sent in full if changed */
332 /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
333 ctx->vertex_elements->num_elements,
334 ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
335 }
336
337 /* The following code is originally generated by gen_merge_state.py, to
338 * emit state in increasing order of address (this makes it possible to merge
339 * consecutive register updates into one SET_STATE command)
340 *
341 * There have been some manual changes, where the weaving operation is not
342 * simply bitwise or:
343 * - scissor fixp
344 * - num vertex elements
345 * - scissor handling
346 * - num samplers
347 * - texture lod
348 * - ETNA_DIRTY_TS
349 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
350 * change anyway
351 * - PS / framebuffer interaction for MSAA
352 * - move update of GL_MULTI_SAMPLE_CONFIG first
353 * - add unlikely()/likely()
354 */
355 struct etna_coalesce coalesce;
356
357 etna_coalesce_start(stream, &coalesce);
358
359 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
360 * directly
361 * or indirectly */
362 /* multi sample config is set first, and outside of the normal sorting
363 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
364 * possibly PS.TEMP_REGISTER_CONTROL).
365 */
366 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
367 uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
368 val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
369
370 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
371 }
372 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER)) &&
373 ctx->index_buffer.ib.buffer) {
374 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
375 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
376 }
377 if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
378 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
379 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
380 }
381 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
382 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
383 }
384 if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
385 for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
386 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
387 }
388 for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
389 if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
390 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
391 }
392 }
393 }
394 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
395 /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
396 }
397 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
398 bool point_size_per_vertex =
399 etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
400
401 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
402 point_size_per_vertex
403 ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
404 : ctx->shader_state.VS_OUTPUT_COUNT);
405 }
406 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
407 /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
408 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
409 }
410 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
411 for (int x = 0; x < 4; ++x) {
412 /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
413 }
414 }
415 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
416 for (int x = 0; x < 4; ++x) {
417 /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
418 }
419 }
420 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
421 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
422 /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
423 if (ctx->specs.has_shader_range_registers) {
424 /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
425 }
426 }
427 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
428 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
429 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
430 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
431 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
432 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
433 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
434 }
435 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
436 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
437
438 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
439 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
440 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
441 }
442 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
443 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
444 }
445 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
446 uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
447 /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
448 }
449 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
450 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
451 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
452 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
453 }
454 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
455 for (int x = 0; x < 10; ++x) {
456 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
457 }
458 }
459 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
460 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
461 /* this is a bit of a mess: rasterizer.scissor determines whether to use
462 * only the framebuffer scissor, or specific scissor state, and the
463 * viewport clips too so the logic spans four CSOs */
464 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
465
466 uint32_t scissor_left =
467 MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
468 uint32_t scissor_top =
469 MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
470 uint32_t scissor_right =
471 MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
472 uint32_t scissor_bottom =
473 MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
474
475 if (rasterizer->scissor) {
476 scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
477 scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
478 scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
479 scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
480 }
481
482 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
483 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
484 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
485 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
486 }
487 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
488 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
489
490 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
491 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
492 /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
493 }
494 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
495 /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
496 }
497 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
498 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
499 for (int x = 0; x < 4; ++x) {
500 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
501 }
502 for (int x = 0; x < 16; ++x) {
503 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
504 }
505 }
506 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
507 /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
508 /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
509 /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
510 ctx->framebuffer.msaa_mode
511 ? ctx->shader_state.PS_INPUT_COUNT_MSAA
512 : ctx->shader_state.PS_INPUT_COUNT);
513 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
514 ctx->framebuffer.msaa_mode
515 ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
516 : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
517 /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
518 /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
519 if (ctx->specs.has_shader_range_registers) {
520 /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
521 0x100);
522 }
523 }
524 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
525 uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
526 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG);
527 }
528 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
529 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
530 /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
531 }
532 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
533 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
534
535 if (ctx->specs.pixel_pipes == 1) {
536 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
537 }
538
539 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
540 }
541 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
542 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP;
543 /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
544 }
545 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) {
546 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG;
547 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG);
548 }
549 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
550 uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
551 /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
552 }
553 if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
554 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
555 }
556 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
557 uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
558 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
559 }
560 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
561 uint32_t val;
562 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
563 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
564 val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
565 VIVS_PE_COLOR_FORMAT_OVERWRITE);
566 val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
567 val &= ctx->framebuffer.PE_COLOR_FORMAT;
568 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
569 }
570 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
571 if (ctx->specs.pixel_pipes == 1) {
572 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
573 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
574 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
575 } else if (ctx->specs.pixel_pipes == 2) {
576 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
577 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
578 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
579 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
580 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
581 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
582 } else {
583 abort();
584 }
585 }
586 if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) {
587 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT);
588 }
589 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
590 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
591
592 /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP);
593 for (int x = 0; x < 2; ++x) {
594 /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
595 }
596 }
597 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
598 /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
599 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
600 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
601 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
602 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
603 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
604 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
605 }
606 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
607 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
608 uint32_t val = 0; /* 0 == sampler inactive */
609
610 /* set active samplers to their configuration value (determined by both
611 * the sampler state and sampler view) */
612 if ((1 << x) & active_samplers) {
613 struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]);
614 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
615
616 val = (ss->TE_SAMPLER_CONFIG0 & sv->TE_SAMPLER_CONFIG0_MASK) |
617 sv->TE_SAMPLER_CONFIG0;
618 }
619
620 /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), val);
621 }
622 }
623 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
624 struct etna_sampler_view *sv;
625
626 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
627 if ((1 << x) & active_samplers) {
628 sv = etna_sampler_view(ctx->sampler_view[x]);
629 /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->TE_SAMPLER_SIZE);
630 }
631 }
632 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
633 if ((1 << x) & active_samplers) {
634 sv = etna_sampler_view(ctx->sampler_view[x]);
635 /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE);
636 }
637 }
638 }
639 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
640 struct etna_sampler_state *ss;
641 struct etna_sampler_view *sv;
642
643 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
644 if ((1 << x) & active_samplers) {
645 ss = etna_sampler_state(ctx->sampler[x]);
646 sv = etna_sampler_view(ctx->sampler_view[x]);
647
648 /* min and max lod is determined both by the sampler and the view */
649 /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x),
650 ss->TE_SAMPLER_LOD_CONFIG |
651 VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(ss->max_lod, sv->max_lod)) |
652 VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss->min_lod, sv->min_lod)));
653 }
654 }
655 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
656 if ((1 << x) & active_samplers) {
657 ss = etna_sampler_state(ctx->sampler[x]);
658 sv = etna_sampler_view(ctx->sampler_view[x]);
659
660 /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->TE_SAMPLER_CONFIG1 |
661 sv->TE_SAMPLER_CONFIG1);
662 }
663 }
664 }
665 if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
666 for (int y = 0; y < VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y) {
667 for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
668 if ((1 << x) & active_samplers) {
669 struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
670 /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y),&sv->TE_SAMPLER_LOD_ADDR[y]);
671 }
672 }
673 }
674 }
675 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
676 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
677 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
678 for (int x = 0; x < 2; ++x) {
679 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
680 }
681 }
682 etna_coalesce_end(stream, &coalesce);
683 /* end only EMIT_STATE */
684
685 /* Insert a FE/PE stall as changing the shader instructions (and maybe
686 * the uniforms) can corrupt the previous in-progress draw operation.
687 * Observed with amoeba on GC2000 during the right-to-left rendering
688 * of PI, and can cause GPU hangs immediately after.
689 * I summise that this is because the "new" locations at 0xc000 are not
690 * properly protected against updates as other states seem to be. Hence,
691 * we detect the "new" vertex shader instruction offset to apply this. */
692 if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
693 etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
694
695 /* We need to update the uniform cache only if one of the following bits are
696 * set in ctx->dirty:
697 * - ETNA_DIRTY_SHADER
698 * - ETNA_DIRTY_CONSTBUF
699 * - uniforms_dirty_bits
700 *
701 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
702 * all
703 * other cases we can load on the changed uniforms.
704 */
705 static const uint32_t uniform_dirty_bits =
706 ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
707
708 if (dirty & (uniform_dirty_bits | ctx->fs->uniforms_dirty_bits))
709 etna_uniforms_write(
710 ctx, ctx->vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
711 ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
712
713 if (dirty & (uniform_dirty_bits | ctx->vs->uniforms_dirty_bits))
714 etna_uniforms_write(
715 ctx, ctx->fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
716 ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
717
718 /**** Large dynamically-sized state ****/
719 if (dirty & (ETNA_DIRTY_SHADER)) {
720 /* Special case: a new shader was loaded; simply re-load all uniforms and
721 * shader code at once */
722 /*04000 or 0C000*/
723 etna_set_state_multi(stream, ctx->specs.vs_offset,
724 ctx->shader_state.vs_inst_mem_size,
725 ctx->shader_state.VS_INST_MEM);
726 /*06000 or 0D000*/
727 etna_set_state_multi(stream, ctx->specs.ps_offset,
728 ctx->shader_state.ps_inst_mem_size,
729 ctx->shader_state.PS_INST_MEM);
730 /*05000*/ etna_set_state_multi(stream, VIVS_VS_UNIFORMS(0),
731 ctx->shader_state.vs_uniforms_size,
732 ctx->shader_state.VS_UNIFORMS);
733 /*07000*/ etna_set_state_multi(stream, VIVS_PS_UNIFORMS(0),
734 ctx->shader_state.ps_uniforms_size,
735 ctx->shader_state.PS_UNIFORMS);
736
737 /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
738 * possible as long as the
739 * same shader remains bound */
740 ctx->gpu3d.vs_uniforms_size = ctx->shader_state.vs_uniforms_size;
741 ctx->gpu3d.ps_uniforms_size = ctx->shader_state.ps_uniforms_size;
742 memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
743 ctx->shader_state.vs_uniforms_size * 4);
744 memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
745 ctx->shader_state.ps_uniforms_size * 4);
746 } else {
747 etna_coalesce_start(stream, &coalesce);
748 for (int x = 0; x < ctx->vs->uniforms.const_count; ++x) {
749 if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
750 /*05000*/ EMIT_STATE(VS_UNIFORMS(x), ctx->shader_state.VS_UNIFORMS[x]);
751 ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
752 }
753 }
754 etna_coalesce_end(stream, &coalesce);
755
756 etna_coalesce_start(stream, &coalesce);
757 for (int x = 0; x < ctx->fs->uniforms.const_count; ++x) {
758 if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
759 /*07000*/ EMIT_STATE(PS_UNIFORMS(x), ctx->shader_state.PS_UNIFORMS[x]);
760 ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
761 }
762 }
763 etna_coalesce_end(stream, &coalesce);
764 }
765 /**** End of state update ****/
766 #undef EMIT_STATE
767 #undef EMIT_STATE_FIXP
768 #undef EMIT_STATE_RELOC
769 ctx->dirty = 0;
770 }