etnaviv: update Android build files
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_emit.c
1 /*
2 * Copyright (c) 2014-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 #include "etnaviv_emit.h"
28
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "hw/state_blt.xml.h"
45 #include "util/u_math.h"
46
47 /* Queue a STALL command (queues 2 words) */
48 static inline void
49 CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
50 {
51 etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
52 etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
53 }
54
55 void
56 etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
57 {
58 bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
59 etna_cmd_stream_reserve(stream, blt ? 8 : 4);
60
61 if (blt) {
62 etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
63 etna_cmd_stream_emit(stream, 1);
64 }
65
66 /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67 etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
68 etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
69
70 if (from == SYNC_RECIPIENT_FE) {
71 /* if the frontend is to be stalled, queue a STALL frontend command */
72 CMD_STALL(stream, from, to);
73 } else {
74 /* otherwise, load the STALL token state */
75 etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
76 etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
77 }
78
79 if (blt) {
80 etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
81 etna_cmd_stream_emit(stream, 0);
82 }
83 }
84
85 #define EMIT_STATE(state_name, src_value) \
86 etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
87
88 #define EMIT_STATE_FIXP(state_name, src_value) \
89 etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
90
91 #define EMIT_STATE_RELOC(state_name, src_value) \
92 etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
93
94 #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
95
96 static unsigned
97 required_stream_size(struct etna_context *ctx)
98 {
99 unsigned size = ETNA_3D_CONTEXT_SIZE;
100
101 /* stall + flush */
102 size += 2 + 4;
103
104 /* vertex elements */
105 size += ctx->vertex_elements->num_elements + 1;
106
107 /* uniforms - worst case (2 words per uniform load) */
108 size += ctx->shader.vs->uniforms.imm_count * 2;
109 size += ctx->shader.fs->uniforms.imm_count * 2;
110
111 /* shader */
112 size += ctx->shader_state.vs_inst_mem_size + 1;
113 size += ctx->shader_state.ps_inst_mem_size + 1;
114
115 /* DRAW_INDEXED_PRIMITIVES command */
116 size += 6;
117
118 /* reserve for alignment etc. */
119 size += 64;
120
121 return size;
122 }
123
124 /* Emit state that only exists on HALTI5+ */
125 static void
126 emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
127 {
128 struct etna_cmd_stream *stream = ctx->stream;
129 uint32_t dirty = ctx->dirty;
130 struct etna_coalesce coalesce;
131
132 etna_coalesce_start(stream, &coalesce);
133 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
134 /* Magic states (load balancing, inter-unit sync, buffers) */
135 /*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG, ctx->shader_state.FE_HALTI5_ID_CONFIG);
136 /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
137 /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
138 for (int x = 0; x < 4; ++x) {
139 /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
140 }
141 }
142 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
143 for (int x = 0; x < 4; ++x) {
144 /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
145 }
146 }
147 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
148 /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
149 /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
150 /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
151 /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
152 }
153 etna_coalesce_end(stream, &coalesce);
154 }
155
156 /* Emit state that no longer exists on HALTI5 */
157 static void
158 emit_pre_halti5_state(struct etna_context *ctx)
159 {
160 struct etna_cmd_stream *stream = ctx->stream;
161 uint32_t dirty = ctx->dirty;
162 struct etna_coalesce coalesce;
163
164 etna_coalesce_start(stream, &coalesce);
165 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
166 /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
167 }
168 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
169 for (int x = 0; x < 4; ++x) {
170 /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
171 }
172 }
173 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
174 for (int x = 0; x < 4; ++x) {
175 /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
176 }
177 }
178 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
179 /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
180 }
181 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
182 for (int x = 0; x < 10; ++x) {
183 /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
184 }
185 }
186 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
187 /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
188 for (int x = 0; x < 4; ++x) {
189 /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
190 }
191 for (int x = 0; x < 16; ++x) {
192 /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
193 }
194 }
195 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
196 /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
197 }
198 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
199 /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
200 }
201 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
202 /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
203 for (int x = 0; x < 2; ++x) {
204 /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
205 }
206 }
207 etna_coalesce_end(stream, &coalesce);
208 }
209
210 /* Weave state before draw operation. This function merges all the compiled
211 * state blocks under the context into one device register state. Parts of
212 * this state that are changed since last call (dirty) will be uploaded as
213 * state changes in the command buffer. */
214 void
215 etna_emit_state(struct etna_context *ctx)
216 {
217 struct etna_cmd_stream *stream = ctx->stream;
218 unsigned ccw = ctx->rasterizer->front_ccw;
219
220
221 /* Pre-reserve the command buffer space which we are likely to need.
222 * This must cover all the state emitted below, and the following
223 * draw command. */
224 etna_cmd_stream_reserve(stream, required_stream_size(ctx));
225
226 uint32_t dirty = ctx->dirty;
227
228 /* Pre-processing: see what caches we need to flush before making state changes. */
229 uint32_t to_flush = 0;
230 if (unlikely(dirty & (ETNA_DIRTY_BLEND)))
231 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
232 if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
233 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
234 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
235 to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
236 if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
237 to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
238
239 if (to_flush) {
240 etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
241 etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
242 }
243
244 /* Flush TS cache before changing TS configuration. */
245 if (unlikely(dirty & ETNA_DIRTY_TS)) {
246 etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
247 }
248
249 /* Update vertex elements. This is different from any of the other states, in that
250 * a) the number of vertex elements written matters: so write only active ones
251 * b) the vertex element states must all be written: do not skip entries that stay the same */
252 if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
253 if (ctx->specs.halti >= 5) {
254 /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
255 ctx->vertex_elements->num_elements,
256 ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
257 /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
258 ctx->vertex_elements->num_elements,
259 ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
260 /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
261 ctx->vertex_elements->num_elements,
262 ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
263 } else {
264 /* Special case: vertex elements must always be sent in full if changed */
265 /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
266 ctx->vertex_elements->num_elements,
267 ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
268 if (ctx->specs.halti >= 2) {
269 /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
270 ctx->vertex_elements->num_elements,
271 ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
272 }
273 }
274 }
275 unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
276 ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
277 : ctx->shader_state.VS_OUTPUT_COUNT;
278
279 /* The following code is originally generated by gen_merge_state.py, to
280 * emit state in increasing order of address (this makes it possible to merge
281 * consecutive register updates into one SET_STATE command)
282 *
283 * There have been some manual changes, where the weaving operation is not
284 * simply bitwise or:
285 * - scissor fixp
286 * - num vertex elements
287 * - scissor handling
288 * - num samplers
289 * - texture lod
290 * - ETNA_DIRTY_TS
291 * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
292 * change anyway
293 * - PS / framebuffer interaction for MSAA
294 * - move update of GL_MULTI_SAMPLE_CONFIG first
295 * - add unlikely()/likely()
296 */
297 struct etna_coalesce coalesce;
298
299 etna_coalesce_start(stream, &coalesce);
300
301 /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
302 * directly
303 * or indirectly */
304 /* multi sample config is set first, and outside of the normal sorting
305 * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
306 * possibly PS.TEMP_REGISTER_CONTROL).
307 */
308 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
309 uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
310 val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
311
312 /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
313 }
314 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
315 /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
316 /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
317 }
318 if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
319 /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
320 }
321 if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
322 if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
323 for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
324 /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
325 }
326 for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
327 if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
328 /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
329 }
330 }
331 } else if(ctx->specs.stream_count > 1) { /* hw w/ multiple vertex streams */
332 for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
333 /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
334 }
335 for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
336 if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
337 /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
338 }
339 }
340 } else { /* hw w/ single vertex stream */
341 /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
342 /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
343 }
344 }
345 /* gallium has instance divisor as part of elements state */
346 if ((dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) && ctx->specs.halti >= 2) {
347 for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {
348 /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]);
349 }
350 }
351
352 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
353
354 /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
355 }
356 if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
357 /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
358 /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
359 }
360 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
361 /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
362 }
363 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
364 /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
365 /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
366 /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
367 /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
368 /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
369 /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
370 }
371 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
372 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
373
374 /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
375 /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
376 /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
377 }
378 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
379 /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
380 }
381 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
382 uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
383 /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
384 }
385 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
386 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
387 /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
388 /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
389 }
390 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
391 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
392 /* this is a bit of a mess: rasterizer.scissor determines whether to use
393 * only the framebuffer scissor, or specific scissor state, and the
394 * viewport clips too so the logic spans four CSOs */
395 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
396
397 uint32_t scissor_left =
398 MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
399 uint32_t scissor_top =
400 MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
401 uint32_t scissor_right =
402 MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
403 uint32_t scissor_bottom =
404 MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
405
406 if (rasterizer->scissor) {
407 scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
408 scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
409 scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
410 scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
411 }
412
413 /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
414 /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
415 /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
416 /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
417 }
418 if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
419 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
420
421 /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
422 /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
423 /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
424 }
425 if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
426 ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
427 struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
428
429 uint32_t clip_right =
430 MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
431 uint32_t clip_bottom =
432 MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
433
434 if (rasterizer->scissor) {
435 clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
436 clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
437 }
438
439 /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
440 /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
441 }
442 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
443 /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
444 }
445 if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
446 /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
447 /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
448 ctx->framebuffer.msaa_mode
449 ? ctx->shader_state.PS_INPUT_COUNT_MSAA
450 : ctx->shader_state.PS_INPUT_COUNT);
451 /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
452 ctx->framebuffer.msaa_mode
453 ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
454 : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
455 /*01010*/ EMIT_STATE(PS_CONTROL, ctx->framebuffer.PS_CONTROL);
456 /*01030*/ EMIT_STATE(PS_CONTROL_EXT, ctx->framebuffer.PS_CONTROL_EXT);
457 }
458 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SHADER))) {
459 /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, (etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG |
460 ctx->framebuffer.PE_DEPTH_CONFIG) &
461 ctx->shader_state.PE_DEPTH_CONFIG);
462 }
463 if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
464 /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
465 /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
466 }
467 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
468 /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
469
470 if (ctx->specs.pixel_pipes == 1) {
471 /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
472 }
473
474 /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
475 }
476
477 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
478 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP[ccw];
479 /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
480 }
481 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {
482 uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG[ccw];
483 /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG[ccw]);
484 }
485 if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
486 uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
487 /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
488 }
489 if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
490 /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
491 }
492 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
493 uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
494 /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
495 }
496 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
497 uint32_t val;
498 /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
499 * as a mask to enable the bits from blend PE_COLOR_FORMAT */
500 val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
501 VIVS_PE_COLOR_FORMAT_OVERWRITE);
502 val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
503 val &= ctx->framebuffer.PE_COLOR_FORMAT;
504 /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
505 }
506 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
507 if (ctx->specs.pixel_pipes == 1) {
508 /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
509 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
510 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
511 } else if (ctx->specs.pixel_pipes == 2) {
512 /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
513 /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
514 /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
515 /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
516 /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
517 /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
518 } else {
519 abort();
520 }
521 }
522 if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {
523 /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[ccw]);
524 }
525 if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
526 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
527 /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
528 }
529 if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
530 struct etna_blend_state *blend = etna_blend_state(ctx->blend);
531 for (int x = 0; x < 2; ++x) {
532 /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
533 }
534 }
535 if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
536 /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0, ctx->blend_color.PE_ALPHA_COLOR_EXT0);
537 /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1, ctx->blend_color.PE_ALPHA_COLOR_EXT1);
538 }
539 if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
540 /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2, etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT2[ccw]);
541 }
542 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER)) && ctx->specs.halti >= 3)
543 /*014BC*/ EMIT_STATE(PE_MEM_CONFIG, ctx->framebuffer.PE_MEM_CONFIG);
544 if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
545 /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
546 /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
547 /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
548 /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
549 /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
550 /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
551 /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
552 /*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT, ctx->framebuffer.TS_COLOR_CLEAR_VALUE_EXT);
553 }
554 if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
555 /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
556 }
557 etna_coalesce_end(stream, &coalesce);
558 /* end only EMIT_STATE */
559
560 /* Emit strongly architecture-specific state */
561 if (ctx->specs.halti >= 5)
562 emit_halti5_only_state(ctx, vs_output_count);
563 else
564 emit_pre_halti5_state(ctx);
565
566 ctx->emit_texture_state(ctx);
567
568 /* Insert a FE/PE stall as changing the shader instructions (and maybe
569 * the uniforms) can corrupt the previous in-progress draw operation.
570 * Observed with amoeba on GC2000 during the right-to-left rendering
571 * of PI, and can cause GPU hangs immediately after.
572 * I summise that this is because the "new" locations at 0xc000 are not
573 * properly protected against updates as other states seem to be. Hence,
574 * we detect the "new" vertex shader instruction offset to apply this. */
575 if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
576 etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
577
578 /* We need to update the uniform cache only if one of the following bits are
579 * set in ctx->dirty:
580 * - ETNA_DIRTY_SHADER
581 * - ETNA_DIRTY_CONSTBUF
582 * - uniforms_dirty_bits
583 *
584 * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
585 * all
586 * other cases we can load on the changed uniforms.
587 */
588 static const uint32_t uniform_dirty_bits =
589 ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
590
591 /**** Large dynamically-sized state ****/
592 bool do_uniform_flush = ctx->specs.halti < 5;
593 if (dirty & (ETNA_DIRTY_SHADER)) {
594 /* Special case: a new shader was loaded; simply re-load all uniforms and
595 * shader code at once */
596 /* This sequence is special, do not change ordering unless necessary. According to comment
597 snippets in the Vivante kernel driver a process called "steering" goes on while programming
598 shader state. This (as I understand it) means certain unified states are "steered"
599 toward a specific shader unit (VS/PS/...) based on either explicit flags in register
600 00860, or what other state is written before "auto-steering". So this means some
601 state can legitimately be programmed multiple times.
602 */
603
604 if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */
605 assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
606 /* Set icache (VS) */
607 etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
608 etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
609 assert(ctx->shader_state.VS_INST_ADDR.bo);
610 etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
611 etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
612 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
613 etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
614
615 /* Set icache (PS) */
616 etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
617 etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
618 assert(ctx->shader_state.PS_INST_ADDR.bo);
619 etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
620 etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
621 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
622 etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
623
624 } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
625 /* ICACHE (pre-HALTI5) */
626 assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
627 /* Set icache (VS) */
628 etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
629 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
630 VIVS_VS_ICACHE_CONTROL_ENABLE |
631 VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
632 assert(ctx->shader_state.VS_INST_ADDR.bo);
633 etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
634
635 /* Set icache (PS) */
636 etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
637 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
638 VIVS_VS_ICACHE_CONTROL_ENABLE |
639 VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
640 assert(ctx->shader_state.PS_INST_ADDR.bo);
641 etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
642 } else {
643 /* Upload shader directly, first flushing and disabling icache if
644 * supported on this hw */
645 if (ctx->specs.has_icache) {
646 etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
647 VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
648 VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
649 }
650 if (ctx->specs.has_shader_range_registers) {
651 etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
652 etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
653 0x100);
654 }
655 etna_set_state_multi(stream, ctx->specs.vs_offset,
656 ctx->shader_state.vs_inst_mem_size,
657 ctx->shader_state.VS_INST_MEM);
658 etna_set_state_multi(stream, ctx->specs.ps_offset,
659 ctx->shader_state.ps_inst_mem_size,
660 ctx->shader_state.PS_INST_MEM);
661 }
662
663 if (ctx->specs.has_unified_uniforms) {
664 etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
665 etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
666 }
667
668 if (do_uniform_flush)
669 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
670
671 etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
672
673 if (do_uniform_flush)
674 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
675
676 etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
677
678 if (ctx->specs.halti >= 5) {
679 /* HALTI5 needs to be prompted to pre-fetch shaders */
680 etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
681 etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
682 etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
683 }
684 } else {
685 /* ideally this cache would only be flushed if there are VS uniform changes */
686 if (do_uniform_flush)
687 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
688
689 if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
690 etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
691
692 /* ideally this cache would only be flushed if there are PS uniform changes */
693 if (do_uniform_flush)
694 etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
695
696 if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
697 etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
698 }
699 /**** End of state update ****/
700 #undef EMIT_STATE
701 #undef EMIT_STATE_FIXP
702 #undef EMIT_STATE_RELOC
703 ctx->dirty = 0;
704 ctx->dirty_sampler_views = 0;
705 }