#include "etnaviv_zsa.h"
#include "hw/common.xml.h"
#include "hw/state.xml.h"
+#include "hw/state_blt.xml.h"
#include "util/u_math.h"
-struct etna_coalesce {
- uint32_t start;
- uint32_t last_reg;
- uint32_t last_fixp;
-};
-
/* Queue a STALL command (queues 2 words) */
static inline void
CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
void
etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
{
- etna_cmd_stream_reserve(stream, 4);
+ bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
+ etna_cmd_stream_reserve(stream, blt ? 8 : 4);
+
+ if (blt) {
+ etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
+ etna_cmd_stream_emit(stream, 1);
+ }
+ /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
}
-}
-
-static void
-etna_coalesce_start(struct etna_cmd_stream *stream,
- struct etna_coalesce *coalesce)
-{
- coalesce->start = etna_cmd_stream_offset(stream);
- coalesce->last_reg = 0;
- coalesce->last_fixp = 0;
-}
-static void
-etna_coalesce_end(struct etna_cmd_stream *stream,
- struct etna_coalesce *coalesce)
-{
- uint32_t end = etna_cmd_stream_offset(stream);
- uint32_t size = end - coalesce->start;
-
- if (size) {
- uint32_t offset = coalesce->start - 1;
- uint32_t value = etna_cmd_stream_get(stream, offset);
-
- value |= VIV_FE_LOAD_STATE_HEADER_COUNT(size);
- etna_cmd_stream_set(stream, offset, value);
- }
-
- /* append needed padding */
- if (end % 2 == 1)
- etna_cmd_stream_emit(stream, 0xdeadbeef);
-}
-
-static void
-check_coalsence(struct etna_cmd_stream *stream, struct etna_coalesce *coalesce,
- uint32_t reg, uint32_t fixp)
-{
- if (coalesce->last_reg != 0) {
- if (((coalesce->last_reg + 4) != reg) || (coalesce->last_fixp != fixp)) {
- etna_coalesce_end(stream, coalesce);
- etna_emit_load_state(stream, reg >> 2, 0, fixp);
- coalesce->start = etna_cmd_stream_offset(stream);
- }
- } else {
- etna_emit_load_state(stream, reg >> 2, 0, fixp);
- coalesce->start = etna_cmd_stream_offset(stream);
- }
-
- coalesce->last_reg = reg;
- coalesce->last_fixp = fixp;
-}
-
-static inline void
-etna_coalsence_emit(struct etna_cmd_stream *stream,
- struct etna_coalesce *coalesce, uint32_t reg,
- uint32_t value)
-{
- check_coalsence(stream, coalesce, reg, 0);
- etna_cmd_stream_emit(stream, value);
-}
-
-static inline void
-etna_coalsence_emit_fixp(struct etna_cmd_stream *stream,
- struct etna_coalesce *coalesce, uint32_t reg,
- uint32_t value)
-{
- check_coalsence(stream, coalesce, reg, 1);
- etna_cmd_stream_emit(stream, value);
-}
-
-static inline void
-etna_coalsence_emit_reloc(struct etna_cmd_stream *stream,
- struct etna_coalesce *coalesce, uint32_t reg,
- const struct etna_reloc *r)
-{
- if (r->bo) {
- check_coalsence(stream, coalesce, reg, 0);
- etna_cmd_stream_reloc(stream, r);
+ if (blt) {
+ etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
+ etna_cmd_stream_emit(stream, 0);
}
}
#define EMIT_STATE_RELOC(state_name, src_value) \
etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
-/* submit RS state, without any processing and no dependence on context
- * except TS if this is a source-to-destination blit. */
-void
-etna_submit_rs_state(struct etna_context *ctx,
- const struct compiled_rs_state *cs)
-{
- struct etna_screen *screen = etna_screen(ctx->base.screen);
- struct etna_cmd_stream *stream = ctx->stream;
- struct etna_coalesce coalesce;
-
- if (cs->RS_KICKER_INPLACE && !cs->source_ts_valid)
- /* Inplace resolve is no-op if TS is not configured */
- return;
-
- ctx->stats.rs_operations++;
-
- if (cs->RS_KICKER_INPLACE) {
- etna_cmd_stream_reserve(stream, 6);
- etna_coalesce_start(stream, &coalesce);
- /* 0/1 */ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
- /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
- /* 4/5 */ EMIT_STATE(RS_KICKER_INPLACE, cs->RS_KICKER_INPLACE);
- etna_coalesce_end(stream, &coalesce);
- } else if (screen->specs.pixel_pipes == 1) {
- etna_cmd_stream_reserve(stream, 22);
- etna_coalesce_start(stream, &coalesce);
- /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
- /* 2 */ EMIT_STATE_RELOC(RS_SOURCE_ADDR, &cs->source[0]);
- /* 3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
- /* 4 */ EMIT_STATE_RELOC(RS_DEST_ADDR, &cs->dest[0]);
- /* 5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
- /* 6/7 */ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
- /* 8/9 */ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
- /*10 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
- /*11 - pad */
- /*12/13*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
- /*14 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
- /*15 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
- /*16 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
- /*17 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
- /*18/19*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
- /*20/21*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
- etna_coalesce_end(stream, &coalesce);
- } else if (screen->specs.pixel_pipes == 2) {
- etna_cmd_stream_reserve(stream, 34); /* worst case - both pipes multi=1 */
- etna_coalesce_start(stream, &coalesce);
- /* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
- /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
- /* 4/5 */ EMIT_STATE(RS_DEST_STRIDE, cs->RS_DEST_STRIDE);
- /* 6/7 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(0), &cs->source[0]);
- if (cs->RS_SOURCE_STRIDE & VIVS_RS_SOURCE_STRIDE_MULTI) {
- /*8 */ EMIT_STATE_RELOC(RS_PIPE_SOURCE_ADDR(1), &cs->source[1]);
- /*9 - pad */
- }
- /*10/11*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(0), &cs->dest[0]);
- if (cs->RS_DEST_STRIDE & VIVS_RS_DEST_STRIDE_MULTI) {
- /*12*/ EMIT_STATE_RELOC(RS_PIPE_DEST_ADDR(1), &cs->dest[1]);
- /*13 - pad */
- }
- /*14/15*/ EMIT_STATE(RS_PIPE_OFFSET(0), cs->RS_PIPE_OFFSET[0]);
- /*16 */ EMIT_STATE(RS_PIPE_OFFSET(1), cs->RS_PIPE_OFFSET[1]);
- /*17 - pad */
- /*18/19*/ EMIT_STATE(RS_WINDOW_SIZE, cs->RS_WINDOW_SIZE);
- /*20/21*/ EMIT_STATE(RS_DITHER(0), cs->RS_DITHER[0]);
- /*22 */ EMIT_STATE(RS_DITHER(1), cs->RS_DITHER[1]);
- /*23 - pad */
- /*24/25*/ EMIT_STATE(RS_CLEAR_CONTROL, cs->RS_CLEAR_CONTROL);
- /*26 */ EMIT_STATE(RS_FILL_VALUE(0), cs->RS_FILL_VALUE[0]);
- /*27 */ EMIT_STATE(RS_FILL_VALUE(1), cs->RS_FILL_VALUE[1]);
- /*28 */ EMIT_STATE(RS_FILL_VALUE(2), cs->RS_FILL_VALUE[2]);
- /*29 */ EMIT_STATE(RS_FILL_VALUE(3), cs->RS_FILL_VALUE[3]);
- /*30/31*/ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
- /*32/33*/ EMIT_STATE(RS_KICKER, 0xbeebbeeb);
- etna_coalesce_end(stream, &coalesce);
- } else {
- abort();
- }
-}
-
-/* Create bit field that specifies which samplers are active and thus need to be
- * programmed
- * 32 bits is enough for 32 samplers. As far as I know this is the upper bound
- * supported on any Vivante hw
- * up to GC4000.
- */
-static uint32_t
-active_samplers_bits(struct etna_context *ctx)
-{
- return ctx->active_sampler_views & ctx->active_samplers;
-}
-
#define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
static unsigned
size += ctx->vertex_elements->num_elements + 1;
/* uniforms - worst case (2 words per uniform load) */
- size += ctx->shader.vs->uniforms.const_count * 2;
- size += ctx->shader.fs->uniforms.const_count * 2;
+ size += ctx->shader.vs->uniforms.imm_count * 2;
+ size += ctx->shader.fs->uniforms.imm_count * 2;
/* shader */
size += ctx->shader_state.vs_inst_mem_size + 1;
return size;
}
+/* Emit state that only exists on HALTI5+ */
+static void
+emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
+{
+ struct etna_cmd_stream *stream = ctx->stream;
+ uint32_t dirty = ctx->dirty;
+ struct etna_coalesce coalesce;
+
+ etna_coalesce_start(stream, &coalesce);
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /* Magic states (load balancing, inter-unit sync, buffers) */
+ /*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG, ctx->shader_state.FE_HALTI5_ID_CONFIG);
+ /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
+ /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
+ for (int x = 0; x < 4; ++x) {
+ /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 4; ++x) {
+ /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
+ /*00A94*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
+ /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
+ /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
+ /*01084*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
+ /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
+ }
+ etna_coalesce_end(stream, &coalesce);
+}
+
+/* Emit state that no longer exists on HALTI5 */
+static void
+emit_pre_halti5_state(struct etna_context *ctx)
+{
+ struct etna_cmd_stream *stream = ctx->stream;
+ uint32_t dirty = ctx->dirty;
+ struct etna_coalesce coalesce;
+
+ etna_coalesce_start(stream, &coalesce);
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 4; ++x) {
+ /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 4; ++x) {
+ /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 10; ++x) {
+ /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
+ /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
+ for (int x = 0; x < 4; ++x) {
+ /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
+ }
+ for (int x = 0; x < 16; ++x) {
+ /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
+ /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
+ /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
+ for (int x = 0; x < 2; ++x) {
+ /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
+ }
+ /*03834*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS2, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
+ }
+ etna_coalesce_end(stream, &coalesce);
+}
+
/* Weave state before draw operation. This function merges all the compiled
* state blocks under the context into one device register state. Parts of
* this state that are changed since last call (dirty) will be uploaded as
etna_emit_state(struct etna_context *ctx)
{
struct etna_cmd_stream *stream = ctx->stream;
- uint32_t active_samplers = active_samplers_bits(ctx);
+ struct etna_screen *screen = ctx->screen;
+ unsigned ccw = ctx->rasterizer->front_ccw;
+
/* Pre-reserve the command buffer space which we are likely to need.
* This must cover all the state emitted below, and the following
/* Pre-processing: see what caches we need to flush before making state changes. */
uint32_t to_flush = 0;
- if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
- /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */
-#if 0
- /* TODO*/
- if ((ctx->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
- (etna_blend_state(ctx->blend)->PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
-#endif
+ if (unlikely(dirty & (ETNA_DIRTY_BLEND)))
to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
- }
if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
}
- /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
- * state to make sure it is always rewritten. */
- if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
- if ((ctx->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
- (ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) {
- /* XXX what does the GPU set these states to on MSAA samples change?
- * Does it do the right thing?
- * (increase/decrease as necessary) or something else? Just set some
- * invalid value until we know for
- * sure. */
- ctx->gpu3d.PS_INPUT_COUNT = 0xffffffff;
- ctx->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
- }
- }
-
/* Update vertex elements. This is different from any of the other states, in that
* a) the number of vertex elements written matters: so write only active ones
* b) the vertex element states must all be written: do not skip entries that stay the same */
if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
- /* Special case: vertex elements must always be sent in full if changed */
- /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
- ctx->vertex_elements->num_elements,
- ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
+ if (screen->specs.halti >= 5) {
+ /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
+ /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
+ /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
+ } else {
+ /* Special case: vertex elements must always be sent in full if changed */
+ /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
+ if (screen->specs.halti >= 2) {
+ /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
+ }
+ }
}
+ unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
+ ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
+ : ctx->shader_state.VS_OUTPUT_COUNT;
/* The following code is originally generated by gen_merge_state.py, to
* emit state in increasing order of address (this makes it possible to merge
/*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
/*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
}
- if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS) && ctx->specs.stream_count == 1))) {
- /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
- /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
- }
if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
/*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
}
- if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS)) && ctx->specs.stream_count > 1)) {
- for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
- /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
- }
- for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
- if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
- /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+ if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
+ if (screen->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
+ }
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+ /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+ }
+ }
+ } else if(screen->specs.stream_count > 1) { /* hw w/ multiple vertex streams */
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
}
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+ /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+ }
+ }
+ } else { /* hw w/ single vertex stream */
+ /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
+ /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
}
}
- if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
- /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
+ /* gallium has instance divisor as part of elements state */
+ if ((dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) && screen->specs.halti >= 2) {
+ for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {
+ /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]);
+ }
}
+
if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
- bool point_size_per_vertex =
- etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
- /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
- point_size_per_vertex
- ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
- : ctx->shader_state.VS_OUTPUT_COUNT);
+ /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
}
if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
/*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
/*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
}
- if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
- for (int x = 0; x < 4; ++x) {
- /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
- for (int x = 0; x < 4; ++x) {
- /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
- }
- }
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
- /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
}
if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
/*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
/*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
/*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
}
- if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
- for (int x = 0; x < 10; ++x) {
- /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
- ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
- /* this is a bit of a mess: rasterizer.scissor determines whether to use
- * only the framebuffer scissor, or specific scissor state, and the
- * viewport clips too so the logic spans four CSOs */
- struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
-
- uint32_t scissor_left =
- MAX2(ctx->framebuffer.SE_SCISSOR_LEFT, ctx->viewport.SE_SCISSOR_LEFT);
- uint32_t scissor_top =
- MAX2(ctx->framebuffer.SE_SCISSOR_TOP, ctx->viewport.SE_SCISSOR_TOP);
- uint32_t scissor_right =
- MIN2(ctx->framebuffer.SE_SCISSOR_RIGHT, ctx->viewport.SE_SCISSOR_RIGHT);
- uint32_t scissor_bottom =
- MIN2(ctx->framebuffer.SE_SCISSOR_BOTTOM, ctx->viewport.SE_SCISSOR_BOTTOM);
-
- if (rasterizer->scissor) {
- scissor_left = MAX2(ctx->scissor.SE_SCISSOR_LEFT, scissor_left);
- scissor_top = MAX2(ctx->scissor.SE_SCISSOR_TOP, scissor_top);
- scissor_right = MIN2(ctx->scissor.SE_SCISSOR_RIGHT, scissor_right);
- scissor_bottom = MIN2(ctx->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
- }
-
- /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, scissor_left);
- /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, scissor_top);
- /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, scissor_right);
- /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, scissor_bottom);
+ if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
+ /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, ctx->clipping.minx << 16);
+ /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, ctx->clipping.miny << 16);
+ /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT);
+ /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM);
}
if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
/*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
/*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
}
- if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
- ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
- struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
-
- uint32_t clip_right =
- MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
- uint32_t clip_bottom =
- MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
-
- if (rasterizer->scissor) {
- clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
- clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
- }
-
- /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
- /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
+ if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
+ /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_CLIP_MARGIN_RIGHT);
+ /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM);
}
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
}
- if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
- /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
- for (int x = 0; x < 4; ++x) {
- /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
- }
- for (int x = 0; x < 16; ++x) {
- /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
- }
- }
if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
- /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
/*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
/*01008*/ EMIT_STATE(PS_INPUT_COUNT,
ctx->framebuffer.msaa_mode
ctx->framebuffer.msaa_mode
? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
: ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
- /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
- /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
+ /*01010*/ EMIT_STATE(PS_CONTROL, ctx->framebuffer.PS_CONTROL);
+ /*01030*/ EMIT_STATE(PS_CONTROL_EXT, ctx->framebuffer.PS_CONTROL_EXT);
}
- if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
- uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
- /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, val | ctx->framebuffer.PE_DEPTH_CONFIG);
+ if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SHADER))) {
+ /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, (etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG |
+ ctx->framebuffer.PE_DEPTH_CONFIG) &
+ ctx->shader_state.PE_DEPTH_CONFIG);
}
if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
/*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
/*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
- if (ctx->specs.pixel_pipes == 1) {
+ if (screen->specs.pixel_pipes == 1) {
/*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
}
/*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
}
- if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
- uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP;
+
+ if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
+ uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP[ccw];
/*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
}
- if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF))) {
- uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG;
- /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG);
+ if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {
+ uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG[ccw];
+ /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG[ccw]);
}
if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
/*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
}
if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
- if (ctx->specs.pixel_pipes == 1) {
+ if (screen->specs.pixel_pipes == 1) {
/*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
/*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
/*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
- } else if (ctx->specs.pixel_pipes == 2) {
+ } else if (screen->specs.pixel_pipes == 2) {
/*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
/*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
/*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
abort();
}
}
- if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF))) {
- /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, ctx->stencil_ref.PE_STENCIL_CONFIG_EXT);
+ if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_ZSA))) {
+ uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT;
+ /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, val | ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[ccw]);
}
if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
struct etna_blend_state *blend = etna_blend_state(ctx->blend);
/*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
}
}
+ if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR)) &&
+ VIV_FEATURE(screen, chipMinorFeatures1, HALF_FLOAT)) {
+ /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0, ctx->blend_color.PE_ALPHA_COLOR_EXT0);
+ /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1, ctx->blend_color.PE_ALPHA_COLOR_EXT1);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
+ /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2, etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT2[ccw]);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER)) && screen->specs.halti >= 3)
+ /*014BC*/ EMIT_STATE(PE_MEM_CONFIG, ctx->framebuffer.PE_MEM_CONFIG);
if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
/*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
/*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
/*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
/*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
/*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
- }
- if (unlikely(dirty & ETNA_DIRTY_SAMPLER_VIEWS)) {
- for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
- /*01720*/ EMIT_STATE(TS_SAMPLER_CONFIG(x), sv->TS_SAMPLER_CONFIG);
- }
- }
- for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
- /*01740*/ EMIT_STATE_RELOC(TS_SAMPLER_STATUS_BASE(x), &sv->TS_SAMPLER_STATUS_BASE);
- }
- }
- for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
- /*01760*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE(x), sv->TS_SAMPLER_CLEAR_VALUE);
- }
- }
- for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
- /*01780*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE2(x), sv->TS_SAMPLER_CLEAR_VALUE2);
- }
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- uint32_t val = 0; /* 0 == sampler inactive */
-
- /* set active samplers to their configuration value (determined by both
- * the sampler state and sampler view) */
- if ((1 << x) & active_samplers) {
- struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]);
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-
- val = (ss->TE_SAMPLER_CONFIG0 & sv->TE_SAMPLER_CONFIG0_MASK) |
- sv->TE_SAMPLER_CONFIG0;
- }
-
- /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), val);
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
- struct etna_sampler_view *sv;
-
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- sv = etna_sampler_view(ctx->sampler_view[x]);
- /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->TE_SAMPLER_SIZE);
- }
- }
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- sv = etna_sampler_view(ctx->sampler_view[x]);
- /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE);
- }
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
- struct etna_sampler_state *ss;
- struct etna_sampler_view *sv;
-
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- ss = etna_sampler_state(ctx->sampler[x]);
- sv = etna_sampler_view(ctx->sampler_view[x]);
-
- /* min and max lod is determined both by the sampler and the view */
- /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x),
- ss->TE_SAMPLER_LOD_CONFIG |
- VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(ss->max_lod, sv->max_lod)) |
- VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss->min_lod, sv->min_lod)));
- }
- }
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- ss = etna_sampler_state(ctx->sampler[x]);
- sv = etna_sampler_view(ctx->sampler_view[x]);
-
- /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->TE_SAMPLER_CONFIG1 |
- sv->TE_SAMPLER_CONFIG1);
- }
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
- for (int y = 0; y < VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y) {
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
- /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y),&sv->TE_SAMPLER_LOD_ADDR[y]);
- }
- }
- }
+ /*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT, ctx->framebuffer.TS_COLOR_CLEAR_VALUE_EXT);
}
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
- /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
- for (int x = 0; x < 2; ++x) {
- /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
- }
- }
- if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) {
- for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
- if ((1 << x) & active_samplers) {
- struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
- /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x), sv->TE_SAMPLER_ASTC0);
- }
- }
}
etna_coalesce_end(stream, &coalesce);
/* end only EMIT_STATE */
- /* Insert a FE/PE stall as changing the shader instructions (and maybe
- * the uniforms) can corrupt the previous in-progress draw operation.
- * Observed with amoeba on GC2000 during the right-to-left rendering
- * of PI, and can cause GPU hangs immediately after.
- * I summise that this is because the "new" locations at 0xc000 are not
- * properly protected against updates as other states seem to be. Hence,
- * we detect the "new" vertex shader instruction offset to apply this. */
- if (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF) && ctx->specs.vs_offset > 0x4000)
+ /* Emit strongly architecture-specific state */
+ if (screen->specs.halti >= 5)
+ emit_halti5_only_state(ctx, vs_output_count);
+ else
+ emit_pre_halti5_state(ctx);
+
+ /* Beginning from Halti0 some of the new shader and sampler states are not
+ * self-synchronizing anymore. Thus we need to stall the FE on PE completion
+ * before loading the new states to avoid corrupting the state of the
+ * in-flight draw.
+ */
+ if (screen->specs.halti >= 0 &&
+ (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF |
+ ETNA_DIRTY_SAMPLERS | ETNA_DIRTY_SAMPLER_VIEWS)))
etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+ ctx->emit_texture_state(ctx);
+
/* We need to update the uniform cache only if one of the following bits are
* set in ctx->dirty:
* - ETNA_DIRTY_SHADER
static const uint32_t uniform_dirty_bits =
ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
- if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
- etna_uniforms_write(
- ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
- ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
-
- if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
- etna_uniforms_write(
- ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
- ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
-
/**** Large dynamically-sized state ****/
+ bool do_uniform_flush = screen->specs.halti < 5;
if (dirty & (ETNA_DIRTY_SHADER)) {
/* Special case: a new shader was loaded; simply re-load all uniforms and
* shader code at once */
- if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
- assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
+ /* This sequence is special, do not change ordering unless necessary. According to comment
+ snippets in the Vivante kernel driver a process called "steering" goes on while programming
+ shader state. This (as I understand it) means certain unified states are "steered"
+ toward a specific shader unit (VS/PS/...) based on either explicit flags in register
+ 00860, or what other state is written before "auto-steering". So this means some
+ state can legitimately be programmed multiple times.
+ */
+
+ if (screen->specs.halti >= 5) { /* ICACHE (HALTI5) */
+ assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
+ /* Set icache (VS) */
+ etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
+ etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
+ assert(ctx->shader_state.VS_INST_ADDR.bo);
+ etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
+ etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
+ etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
+
+ /* Set icache (PS) */
+ etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
+ etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
+ assert(ctx->shader_state.PS_INST_ADDR.bo);
+ etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
+ etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
+ etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
+
+ } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
+ /* ICACHE (pre-HALTI5) */
+ assert(screen->specs.has_icache && screen->specs.has_shader_range_registers);
/* Set icache (VS) */
etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
} else {
/* Upload shader directly, first flushing and disabling icache if
* supported on this hw */
- if (ctx->specs.has_icache) {
+ if (screen->specs.has_icache) {
etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
}
- if (ctx->specs.has_shader_range_registers) {
+ if (screen->specs.has_shader_range_registers) {
etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
0x100);
}
- etna_set_state_multi(stream, ctx->specs.vs_offset,
+ etna_set_state_multi(stream, screen->specs.vs_offset,
ctx->shader_state.vs_inst_mem_size,
ctx->shader_state.VS_INST_MEM);
- etna_set_state_multi(stream, ctx->specs.ps_offset,
+ etna_set_state_multi(stream, screen->specs.ps_offset,
ctx->shader_state.ps_inst_mem_size,
ctx->shader_state.PS_INST_MEM);
}
- if (ctx->specs.has_unified_uniforms) {
+ if (screen->specs.has_unified_uniforms) {
etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
- etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
+ etna_set_state(stream, VIVS_PS_UNIFORM_BASE, screen->specs.max_vs_uniforms);
+ }
+
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
+
+ etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
+
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+
+ etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
+
+ if (screen->specs.halti >= 5) {
+ /* HALTI5 needs to be prompted to pre-fetch shaders */
+ etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
+ etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
+ etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
}
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
- etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
- ctx->shader_state.vs_uniforms_size,
- ctx->shader_state.VS_UNIFORMS);
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
- etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
- ctx->shader_state.ps_uniforms_size,
- ctx->shader_state.PS_UNIFORMS);
-
- /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
- * possible as long as the
- * same shader remains bound */
- ctx->gpu3d.vs_uniforms_size = ctx->shader_state.vs_uniforms_size;
- ctx->gpu3d.ps_uniforms_size = ctx->shader_state.ps_uniforms_size;
- memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
- ctx->shader_state.vs_uniforms_size * 4);
- memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
- ctx->shader_state.ps_uniforms_size * 4);
} else {
/* ideally this cache would only be flushed if there are VS uniform changes */
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
- etna_coalesce_start(stream, &coalesce);
- for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
- if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
- etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]);
- ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
- }
- }
- etna_coalesce_end(stream, &coalesce);
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
+
+ if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
+ etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
/* ideally this cache would only be flushed if there are PS uniform changes */
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
- etna_coalesce_start(stream, &coalesce);
- for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
- if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
- etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]);
- ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
- }
- }
- etna_coalesce_end(stream, &coalesce);
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+
+ if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
+ etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
}
/**** End of state update ****/
#undef EMIT_STATE
#undef EMIT_STATE_FIXP
#undef EMIT_STATE_RELOC
ctx->dirty = 0;
+ ctx->dirty_sampler_views = 0;
}