panfrost/decode: Hit MRT blend shader enable bits

[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_emit.c
diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c

index 508c7b1b6aba00c7d2081e8eafb562887aa081d3..ed7b7ee3cb8881f8b41ce5a91cf7b667a1407ac5 100644 (file)
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -91,18 +91,6 @@ etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
  #define EMIT_STATE_RELOC(state_name, src_value) \
     etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
  
-/* Create bit field that specifies which samplers are active and thus need to be
- * programmed
- * 32 bits is enough for 32 samplers. As far as I know this is the upper bound
- * supported on any Vivante hw
- * up to GC4000.
- */
-static uint32_t
-active_samplers_bits(struct etna_context *ctx)
-{
-   return ctx->active_sampler_views & ctx->active_samplers;
-}
-
  #define ETNA_3D_CONTEXT_SIZE  (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
  
  static unsigned
@@ -133,6 +121,91 @@ required_stream_size(struct etna_context *ctx)
     return size;
  }
  
+/* Emit state that only exists on HALTI5+ */
+static void
+emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
+{
+   struct etna_cmd_stream *stream = ctx->stream;
+   uint32_t dirty = ctx->dirty;
+   struct etna_coalesce coalesce;
+
+   etna_coalesce_start(stream, &coalesce);
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      /* Magic states (load balancing, inter-unit sync, buffers) */
+      /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
+      /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
+      for (int x = 0; x < 4; ++x) {
+         /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
+      }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
+      for (int x = 0; x < 4; ++x) {
+         /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
+      }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
+      /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
+      /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
+      /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
+   }
+   etna_coalesce_end(stream, &coalesce);
+}
+
+/* Emit state that no longer exists on HALTI5 */
+static void
+emit_pre_halti5_state(struct etna_context *ctx)
+{
+   struct etna_cmd_stream *stream = ctx->stream;
+   uint32_t dirty = ctx->dirty;
+   struct etna_coalesce coalesce;
+
+   etna_coalesce_start(stream, &coalesce);
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      for (int x = 0; x < 4; ++x) {
+        /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
+      }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
+      for (int x = 0; x < 4; ++x) {
+        /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
+      }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      for (int x = 0; x < 10; ++x) {
+         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
+      }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
+      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
+      for (int x = 0; x < 4; ++x) {
+         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
+      }
+      for (int x = 0; x < 16; ++x) {
+         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
+      }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
+      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
+      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
+      for (int x = 0; x < 2; ++x) {
+         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
+      }
+   }
+   etna_coalesce_end(stream, &coalesce);
+}
+
  /* Weave state before draw operation. This function merges all the compiled
   * state blocks under the context into one device register state. Parts of
   * this state that are changed since last call (dirty) will be uploaded as
@@ -141,7 +214,6 @@ void
  etna_emit_state(struct etna_context *ctx)
  {
     struct etna_cmd_stream *stream = ctx->stream;
-   uint32_t active_samplers = active_samplers_bits(ctx);
  
     /* Pre-reserve the command buffer space which we are likely to need.
      * This must cover all the state emitted below, and the following
@@ -178,35 +250,35 @@ etna_emit_state(struct etna_context *ctx)
        etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
     }
  
-   /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
-    * state to make sure it is always rewritten. */
-   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
-      if ((ctx->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
-          (ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) {
-         /* XXX what does the GPU set these states to on MSAA samples change?
-          * Does it do the right thing?
-          * (increase/decrease as necessary) or something else? Just set some
-          * invalid value until we know for
-          * sure. */
-         ctx->gpu3d.PS_INPUT_COUNT = 0xffffffff;
-         ctx->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
-      }
-   }
-
     /* Update vertex elements. This is different from any of the other states, in that
      * a) the number of vertex elements written matters: so write only active ones
      * b) the vertex element states must all be written: do not skip entries that stay the same */
     if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
-      /* Special case: vertex elements must always be sent in full if changed */
-      /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
-         ctx->vertex_elements->num_elements,
-         ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
-      if (ctx->specs.halti >= 2) {
-         /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
+      if (ctx->specs.halti >= 5) {
+         /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
+            ctx->vertex_elements->num_elements,
+            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
+         /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
              ctx->vertex_elements->num_elements,
              ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
+         /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
+            ctx->vertex_elements->num_elements,
+            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
+      } else {
+         /* Special case: vertex elements must always be sent in full if changed */
+         /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
+            ctx->vertex_elements->num_elements,
+            ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
+         if (ctx->specs.halti >= 2) {
+            /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
+               ctx->vertex_elements->num_elements,
+               ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
+         }
        }
     }
+   unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
+                           ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
+                           : ctx->shader_state.VS_OUTPUT_COUNT;
  
     /* The following code is originally generated by gen_merge_state.py, to
      * emit state in increasing order of address (this makes it possible to merge
@@ -247,52 +319,48 @@ etna_emit_state(struct etna_context *ctx)
        /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
        /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
     }
-   if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS) && ctx->specs.stream_count == 1))) {
-      /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
-      /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
-   }
     if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
        /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
     }
-   if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS)) && ctx->specs.stream_count > 1)) {
-      for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
-         /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
-      }
-      for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
-         if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
-            /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+   if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
+      if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
+         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+            /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
           }
+         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+               /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+            }
+         }
+         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+               /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680);
+            }
+         }
+      } else if(ctx->specs.stream_count >= 1) { /* hw w/ multiple vertex streams */
+         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+            /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
+         }
+         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+               /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+            }
+         }
+      } else { /* hw w/ single vertex stream */
+         /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
+         /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
        }
     }
-   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
-      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
-   }
     if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
-      bool point_size_per_vertex =
-         etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
  
-      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
-                           point_size_per_vertex
-                              ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
-                              : ctx->shader_state.VS_OUTPUT_COUNT);
+      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
     }
     if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
        /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
        /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
     }
-   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
-      for (int x = 0; x < 4; ++x) {
-         /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
-      }
-   }
-   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
-      for (int x = 0; x < 4; ++x) {
-         /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
-      }
-   }
     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
        /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
-      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
     }
     if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
        /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
@@ -321,11 +389,6 @@ etna_emit_state(struct etna_context *ctx)
        /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
        /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
     }
-   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
-      for (int x = 0; x < 10; ++x) {
-         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
-      }
-   }
     if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
                           ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
        /* this is a bit of a mess: rasterizer.scissor determines whether to use
@@ -381,17 +444,7 @@ etna_emit_state(struct etna_context *ctx)
     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
        /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
     }
-   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
-      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
-      for (int x = 0; x < 4; ++x) {
-         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
-      }
-      for (int x = 0; x < 16; ++x) {
-         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
-      }
-   }
     if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
-      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
        /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
        /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
                             ctx->framebuffer.msaa_mode
@@ -402,7 +455,6 @@ etna_emit_state(struct etna_context *ctx)
                                ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
                                : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
        /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
-      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
     }
     if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
        uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
@@ -488,119 +540,20 @@ etna_emit_state(struct etna_context *ctx)
        /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
        /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
     }
-   if (unlikely(dirty & ETNA_DIRTY_SAMPLER_VIEWS)) {
-      for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*01720*/ EMIT_STATE(TS_SAMPLER_CONFIG(x), sv->TS_SAMPLER_CONFIG);
-         }
-      }
-      for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*01740*/ EMIT_STATE_RELOC(TS_SAMPLER_STATUS_BASE(x), &sv->TS_SAMPLER_STATUS_BASE);
-         }
-      }
-      for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*01760*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE(x), sv->TS_SAMPLER_CLEAR_VALUE);
-         }
-      }
-      for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*01780*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE2(x), sv->TS_SAMPLER_CLEAR_VALUE2);
-         }
-      }
-   }
-   if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
-      for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-         uint32_t val = 0; /* 0 == sampler inactive */
-
-         /* set active samplers to their configuration value (determined by both
-          * the sampler state and sampler view) */
-         if ((1 << x) & active_samplers) {
-            struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]);
-            struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-
-            val = (ss->TE_SAMPLER_CONFIG0 & sv->TE_SAMPLER_CONFIG0_MASK) |
-                  sv->TE_SAMPLER_CONFIG0;
-         }
-
-         /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), val);
-      }
-   }
-   if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
-      struct etna_sampler_view *sv;
-
-      for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->TE_SAMPLER_SIZE);
-         }
-      }
-      for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE);
-         }
-      }
-   }
-   if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
-      struct etna_sampler_state *ss;
-      struct etna_sampler_view *sv;
-
-      for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            ss = etna_sampler_state(ctx->sampler[x]);
-            sv = etna_sampler_view(ctx->sampler_view[x]);
-
-            /* min and max lod is determined both by the sampler and the view */
-            /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x),
-                                 ss->TE_SAMPLER_LOD_CONFIG |
-                                 VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(ss->max_lod, sv->max_lod)) |
-                                 VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss->min_lod, sv->min_lod)));
-         }
-      }
-      for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            ss = etna_sampler_state(ctx->sampler[x]);
-            sv = etna_sampler_view(ctx->sampler_view[x]);
-
-            /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->TE_SAMPLER_CONFIG1 |
-                                                        sv->TE_SAMPLER_CONFIG1);
-         }
-      }
-   }
-   if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
-      for (int y = 0; y < VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y) {
-         for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-            if ((1 << x) & active_samplers) {
-               struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-               /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y),&sv->TE_SAMPLER_LOD_ADDR[y]);
-            }
-         }
-      }
-   }
     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
        /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
-      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
-      for (int x = 0; x < 2; ++x) {
-         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
-      }
-   }
-   if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) {
-      for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
-         if ((1 << x) & active_samplers) {
-            struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
-            /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x), sv->TE_SAMPLER_ASTC0);
-         }
-      }
     }
     etna_coalesce_end(stream, &coalesce);
     /* end only EMIT_STATE */
  
+   /* Emit strongly architecture-specific state */
+   if (ctx->specs.halti >= 5)
+      emit_halti5_only_state(ctx, vs_output_count);
+   else
+      emit_pre_halti5_state(ctx);
+
+   ctx->emit_texture_state(ctx);
+
     /* Insert a FE/PE stall as changing the shader instructions (and maybe
      * the uniforms) can corrupt the previous in-progress draw operation.
      * Observed with amoeba on GC2000 during the right-to-left rendering
@@ -635,10 +588,40 @@ etna_emit_state(struct etna_context *ctx)
           ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
  
     /**** Large dynamically-sized state ****/
+   bool do_uniform_flush = ctx->specs.halti < 5;
     if (dirty & (ETNA_DIRTY_SHADER)) {
        /* Special case: a new shader was loaded; simply re-load all uniforms and
         * shader code at once */
-      if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
+      /* This sequence is special, do not change ordering unless necessary. According to comment
+         snippets in the Vivante kernel driver a process called "steering" goes on while programming
+         shader state. This (as I understand it) means certain unified states are "steered"
+         toward a specific shader unit (VS/PS/...) based on either explicit flags in register
+         00860, or what other state is written before "auto-steering". So this means some
+         state can legitimately be programmed multiple times.
+       */
+
+      if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */
+         assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
+         /* Set icache (VS) */
+         etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
+         etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
+         assert(ctx->shader_state.VS_INST_ADDR.bo);
+         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
+         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
+         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
+         etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
+
+         /* Set icache (PS) */
+         etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
+         etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
+         assert(ctx->shader_state.PS_INST_ADDR.bo);
+         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
+         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
+         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
+         etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
+
+      } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
+         /* ICACHE (pre-HALTI5) */
           assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
           /* Set icache (VS) */
           etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
@@ -680,11 +663,14 @@ etna_emit_state(struct etna_context *ctx)
           etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
           etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
        }
-      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
+
+      if (do_uniform_flush)
+         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
        etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
                                       ctx->shader_state.vs_uniforms_size,
                                       ctx->shader_state.VS_UNIFORMS);
-      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+      if (do_uniform_flush)
+         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
        etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
                                       ctx->shader_state.ps_uniforms_size,
                                       ctx->shader_state.PS_UNIFORMS);
@@ -692,15 +678,21 @@ etna_emit_state(struct etna_context *ctx)
        /* Copy uniforms to gpu3d, so that incremental updates to uniforms are
         * possible as long as the
         * same shader remains bound */
-      ctx->gpu3d.vs_uniforms_size = ctx->shader_state.vs_uniforms_size;
-      ctx->gpu3d.ps_uniforms_size = ctx->shader_state.ps_uniforms_size;
        memcpy(ctx->gpu3d.VS_UNIFORMS, ctx->shader_state.VS_UNIFORMS,
               ctx->shader_state.vs_uniforms_size * 4);
        memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
               ctx->shader_state.ps_uniforms_size * 4);
+
+      if (ctx->specs.halti >= 5) {
+         /* HALTI5 needs to be prompted to pre-fetch shaders */
+         etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
+         etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
+         etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
+      }
     } else {
        /* ideally this cache would only be flushed if there are VS uniform changes */
-      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
+      if (do_uniform_flush)
+         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
        etna_coalesce_start(stream, &coalesce);
        for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
           if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
@@ -711,7 +703,8 @@ etna_emit_state(struct etna_context *ctx)
        etna_coalesce_end(stream, &coalesce);
  
        /* ideally this cache would only be flushed if there are PS uniform changes */
-      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+      if (do_uniform_flush)
+         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
        etna_coalesce_start(stream, &coalesce);
        for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
           if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
@@ -726,4 +719,5 @@ etna_emit_state(struct etna_context *ctx)
  #undef EMIT_STATE_FIXP
  #undef EMIT_STATE_RELOC
     ctx->dirty = 0;
+   ctx->dirty_sampler_views = 0;
  }