ilo: embed ilo_state_sol in ilo_shader
authorChia-I Wu <olvaffe@gmail.com>
Fri, 29 May 2015 07:25:13 +0000 (15:25 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Sun, 14 Jun 2015 17:07:09 +0000 (01:07 +0800)
src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
src/gallium/drivers/ilo/ilo_blitter.h
src/gallium/drivers/ilo/ilo_blitter_rectlist.c
src/gallium/drivers/ilo/ilo_render_gen7.c
src/gallium/drivers/ilo/ilo_shader.c
src/gallium/drivers/ilo/ilo_shader.h
src/gallium/drivers/ilo/ilo_state.h
src/gallium/drivers/ilo/shader/ilo_shader_internal.h

index d5a4c778a8763ed1bc7962fe601985b00417cc82..3a2522186be9e3e22e0e34a1952aff41a3ffd35c 100644 (file)
@@ -37,6 +37,7 @@
 #include "ilo_dev.h"
 #include "ilo_state_3d.h"
 #include "ilo_state_sampler.h"
+#include "ilo_state_sol.h"
 #include "ilo_builder.h"
 
 static inline void
@@ -1013,131 +1014,41 @@ gen7_disable_3DSTATE_GS(struct ilo_builder *builder)
 
 static inline void
 gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
-                       int render_stream,
-                       bool render_disable,
-                       int vertex_attrib_count,
-                       const int *buf_strides)
+                       const struct ilo_state_sol *sol)
 {
    const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
    uint32_t *dw;
-   int buf_mask;
 
    ILO_DEV_ASSERT(builder->dev, 7, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2);
-
-   dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
-   if (render_disable)
-      dw[1] |= GEN7_SO_DW1_RENDER_DISABLE;
-
-   if (buf_strides) {
-      buf_mask = ((bool) buf_strides[3]) << 3 |
-                 ((bool) buf_strides[2]) << 2 |
-                 ((bool) buf_strides[1]) << 1 |
-                 ((bool) buf_strides[0]);
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-         dw[3] = buf_strides[1] << 16 | buf_strides[0];
-         dw[4] = buf_strides[3] << 16 | buf_strides[1];
-      }
-   } else {
-      buf_mask = 0;
-   }
-
-   if (buf_mask) {
-      int read_len;
-
-      dw[1] |= GEN7_SO_DW1_SO_ENABLE |
-               GEN7_SO_DW1_STATISTICS;
-      /* API_OPENGL */
-      if (true)
-         dw[1] |= GEN7_REORDER_TRAILING << GEN7_SO_DW1_REORDER_MODE__SHIFT;
-      if (ilo_dev_gen(builder->dev) < ILO_GEN(8))
-         dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
-
-      read_len = (vertex_attrib_count + 1) / 2;
-      if (!read_len)
-         read_len = 1;
-
-      dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
-              0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
-              0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
-              0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
-   } else {
-      dw[2] = 0;
+   /* see sol_set_gen7_3DSTATE_STREAMOUT() */
+   dw[1] = sol->so[0];
+   dw[2] = sol->so[1];
+   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+      dw[3] = sol->so[2];
+      dw[4] = sol->so[3];
    }
 }
 
 static inline void
 gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
-                          const struct pipe_stream_output_info *so_info)
+                          const struct ilo_state_sol *sol)
 {
    /*
     * Note that "DWord Length" has 9 bits for this command and the type of
     * cmd_len cannot be uint8_t.
     */
    uint16_t cmd_len;
-   struct {
-      int buf_selects;
-      int decl_count;
-      uint16_t decls[128];
-   } streams[4];
-   unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
-   int hw_decl_count, i;
+   int cmd_decl_count;
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 8);
 
-   memset(streams, 0, sizeof(streams));
-   memset(buf_offsets, 0, sizeof(buf_offsets));
-
-   for (i = 0; i < so_info->num_outputs; i++) {
-      unsigned decl, st, buf, reg, mask;
-
-      st = so_info->output[i].stream;
-      buf = so_info->output[i].output_buffer;
-
-      /* pad with holes */
-      while (buf_offsets[buf] < so_info->output[i].dst_offset) {
-         int num_dwords;
-
-         num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
-         if (num_dwords > 4)
-            num_dwords = 4;
-
-         decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
-                GEN7_SO_DECL_HOLE_FLAG |
-                ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
-         assert(streams[st].decl_count < Elements(streams[st].decls));
-         streams[st].decls[streams[st].decl_count++] = decl;
-         buf_offsets[buf] += num_dwords;
-      }
-      assert(buf_offsets[buf] == so_info->output[i].dst_offset);
-
-      reg = so_info->output[i].register_index;
-      mask = ((1 << so_info->output[i].num_components) - 1) <<
-         so_info->output[i].start_component;
-
-      decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
-             reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
-             mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
-      assert(streams[st].decl_count < Elements(streams[st].decls));
-
-      streams[st].buf_selects |= 1 << buf;
-      streams[st].decls[streams[st].decl_count++] = decl;
-      buf_offsets[buf] += so_info->output[i].num_components;
-   }
-
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
-      hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
-                           streams[2].decl_count, streams[3].decl_count);
+      cmd_decl_count = sol->decl_count;
    } else {
       /*
        * From the Ivy Bridge PRM, volume 2 part 1, page 201:
@@ -1146,28 +1057,22 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
        *      whenever this command is issued. The "Num Entries [n]" fields
        *      still contain the actual numbers of valid decls."
        */
-      hw_decl_count = 128;
+      cmd_decl_count = 128;
    }
 
-   cmd_len = 3 + 2 * hw_decl_count;
+   cmd_len = 3 + 2 * cmd_decl_count;
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
-   dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
-           streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
-           streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
-           streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
-   dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
-           streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
-           streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
-           streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
-   dw += 3;
-
-   for (i = 0; i < hw_decl_count; i++) {
-      dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
-      dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
-      dw += 2;
+   /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */
+   dw[1] = sol->so[4];
+   dw[2] = sol->so[5];
+   memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count);
+
+   if (sol->decl_count < cmd_decl_count) {
+      memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) *
+            cmd_decl_count - sol->decl_count);
    }
 }
 
index 072f0f7f7fc6153ccac38e80f625ae7cb5c2366f..3d02063f8092c1037f85679a528ff29591a64e9e 100644 (file)
@@ -61,6 +61,8 @@ struct ilo_blitter {
    struct ilo_ve_state ve;
    struct pipe_draw_info draw;
 
+   struct ilo_state_sol sol;
+
    struct ilo_state_viewport vp;
    uint32_t vp_data[20];
 
index 9d4319563143e23f528e0e5c21529e6679638419..b2b839cbb41405b770d5125374a469d071114b02 100644 (file)
@@ -64,6 +64,8 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter)
    blitter->draw.mode = ILO_PRIM_RECTANGLES;
    blitter->draw.count = 3;
 
+   ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false);
+
    /**
     * From the Haswell PRM, volume 7, page 615:
     *
index 0931a771876843676c233a2aebf1db5c2c301052..95884a0d51d4fcdfde20cab60fddfb819091e1cb 100644 (file)
@@ -420,7 +420,7 @@ gen7_draw_sol(struct ilo_render *r,
               const struct ilo_state_vector *vec,
               struct ilo_render_draw_session *session)
 {
-   const struct pipe_stream_output_info *so_info;
+   const struct ilo_state_sol *sol;
    const struct ilo_shader_state *shader;
    bool dirty_sh = false;
 
@@ -433,13 +433,16 @@ gen7_draw_sol(struct ilo_render *r,
       dirty_sh = DIRTY(VS);
    }
 
-   so_info = ilo_shader_get_kernel_so_info(shader);
+   sol = ilo_shader_get_kernel_sol(shader);
 
    /* 3DSTATE_SO_BUFFER */
    if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
        vec->so.enabled) {
+      const struct pipe_stream_output_info *so_info;
       int i;
 
+      so_info = ilo_shader_get_kernel_so_info(shader);
+
       for (i = 0; i < vec->so.count; i++) {
          const int stride = so_info->stride[i] * 4; /* in bytes */
 
@@ -452,22 +455,30 @@ gen7_draw_sol(struct ilo_render *r,
 
    /* 3DSTATE_SO_DECL_LIST */
    if (dirty_sh && vec->so.enabled)
-      gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
-
-   /* 3DSTATE_STREAMOUT */
-   if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
-      const int output_count = ilo_shader_get_kernel_param(shader,
-            ILO_KERNEL_OUTPUT_COUNT);
-      int buf_strides[4] = { 0, 0, 0, 0 };
-      int i;
+      gen7_3DSTATE_SO_DECL_LIST(r->builder, sol);
 
-      for (i = 0; i < vec->so.count; i++)
-         buf_strides[i] = so_info->stride[i] * 4;
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 196-197:
+    *
+    *     "Anytime the SOL unit MMIO registers or non-pipeline state are
+    *      written, the SOL unit needs to receive a pipeline state update with
+    *      SOL unit dirty state for information programmed in MMIO/NP to get
+    *      loaded into the SOL unit.
+    *
+    *      The SOL unit incorrectly double buffers MMIO/NP registers and only
+    *      moves them into the design for usage when control topology is
+    *      received with the SOL unit dirty state.
+    *
+    *      If the state does not change, need to resend the same state.
+    *
+    *      Because of corruption, software must flush the whole fixed function
+    *      pipeline when 3DSTATE_STREAMOUT changes state."
+    *
+    * The first and fourth paragraphs are gone on Gen7.5+.
+    */
 
-      gen7_3DSTATE_STREAMOUT(r->builder, 0,
-            vec->rasterizer->state.rasterizer_discard,
-            output_count, buf_strides);
-   }
+   /* 3DSTATE_STREAMOUT */
+   gen7_3DSTATE_STREAMOUT(r->builder, sol);
 }
 
 static void
@@ -717,7 +728,7 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
    gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
    gen7_disable_3DSTATE_GS(r->builder);
 
-   gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
+   gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
 
    gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
 
index af467064fe48ff0b04dd63bb75b083636999f513..e9eb042ebc8ff82e5900f90c10a4b49ea6b2dac7 100644 (file)
@@ -557,39 +557,103 @@ ilo_shader_state_search_variant(struct ilo_shader_state *state,
 }
 
 static void
-copy_so_info(struct ilo_shader *sh,
-             const struct pipe_stream_output_info *so_info)
+init_sol(struct ilo_shader *kernel,
+         const struct ilo_dev *dev,
+         const struct pipe_stream_output_info *so_info,
+         bool rasterizer_discard)
 {
-   unsigned i, attr;
+   struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS];
+   unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
+   struct ilo_state_sol_info info;
+   unsigned i;
 
-   if (!so_info->num_outputs)
+   if (!so_info->num_outputs) {
+      ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard);
       return;
+   }
+
+   memset(&info, 0, sizeof(info));
+   info.data = kernel->sol_data;
+   info.data_size = sizeof(kernel->sol_data);
+   info.sol_enable = true;
+   info.stats_enable = true;
+   info.tristrip_reorder = GEN7_REORDER_TRAILING;
+   info.render_disable = rasterizer_discard;
+   info.render_stream = 0;
+
+   for (i = 0; i < 4; i++) {
+      info.buffer_strides[i] = so_info->stride[i] * 4;
 
-   sh->so_info = *so_info;
+      info.streams[i].cv_vue_attr_count = kernel->out.count;
+      info.streams[i].decls = decls[i];
+   }
 
+   memset(decls, 0, sizeof(decls));
+   memset(buf_offsets, 0, sizeof(buf_offsets));
    for (i = 0; i < so_info->num_outputs; i++) {
+      const unsigned stream = so_info->output[i].stream;
+      const unsigned buffer = so_info->output[i].output_buffer;
+      struct ilo_state_sol_decl_info *decl;
+      unsigned attr;
+
       /* figure out which attribute is sourced */
-      for (attr = 0; attr < sh->out.count; attr++) {
-         const int reg_idx = sh->out.register_indices[attr];
+      for (attr = 0; attr < kernel->out.count; attr++) {
+         const int reg_idx = kernel->out.register_indices[attr];
          if (reg_idx == so_info->output[i].register_index)
             break;
       }
-
-      if (attr < sh->out.count) {
-         sh->so_info.output[i].register_index = attr;
-      }
-      else {
+      if (attr >= kernel->out.count) {
          assert(!"stream output an undefined register");
-         sh->so_info.output[i].register_index = 0;
+         attr = 0;
       }
 
+      if (info.streams[stream].vue_read_count < attr + 1)
+         info.streams[stream].vue_read_count = attr + 1;
+
+      /* pad with holes first */
+      while (buf_offsets[buffer] < so_info->output[i].dst_offset) {
+         int num_dwords;
+
+         num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer];
+         if (num_dwords > 4)
+            num_dwords = 4;
+
+         assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+         decl = &decls[stream][info.streams[stream].decl_count];
+
+         decl->attr = 0;
+         decl->is_hole = true;
+         decl->component_base = 0;
+         decl->component_count = num_dwords;
+         decl->buffer = buffer;
+
+         info.streams[stream].decl_count++;
+         buf_offsets[buffer] += num_dwords;
+      }
+      assert(buf_offsets[buffer] == so_info->output[i].dst_offset);
+
+      assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+      decl = &decls[stream][info.streams[stream].decl_count];
+
+      decl->attr = attr;
+      decl->is_hole = false;
       /* PSIZE is at W channel */
-      if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
+      if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
          assert(so_info->output[i].start_component == 0);
          assert(so_info->output[i].num_components == 1);
-         sh->so_info.output[i].start_component = 3;
+         decl->component_base = 3;
+         decl->component_count = 1;
+      } else {
+         decl->component_base = so_info->output[i].start_component;
+         decl->component_count = so_info->output[i].num_components;
       }
+      decl->buffer = buffer;
+
+      info.streams[stream].decl_count++;
+      buf_offsets[buffer] += so_info->output[i].num_components;
    }
+
+   ilo_state_sol_init(&kernel->sol, dev, &info);
 }
 
 /**
@@ -599,17 +663,20 @@ static struct ilo_shader *
 ilo_shader_state_add_variant(struct ilo_shader_state *state,
                              const struct ilo_shader_variant *variant)
 {
+   bool rasterizer_discard = false;
    struct ilo_shader *sh;
 
    switch (state->info.type) {
    case PIPE_SHADER_VERTEX:
       sh = ilo_shader_compile_vs(state, variant);
+      rasterizer_discard = variant->u.vs.rasterizer_discard;
       break;
    case PIPE_SHADER_FRAGMENT:
       sh = ilo_shader_compile_fs(state, variant);
       break;
    case PIPE_SHADER_GEOMETRY:
       sh = ilo_shader_compile_gs(state, variant);
+      rasterizer_discard = variant->u.gs.rasterizer_discard;
       break;
    case PIPE_SHADER_COMPUTE:
       sh = ilo_shader_compile_cs(state, variant);
@@ -625,7 +692,8 @@ ilo_shader_state_add_variant(struct ilo_shader_state *state,
 
    sh->variant = *variant;
 
-   copy_so_info(sh, &state->info.stream_output);
+   init_sol(sh, state->info.dev, &state->info.stream_output,
+         rasterizer_discard);
 
    ilo_shader_state_add_shader(state, sh);
 
@@ -1163,12 +1231,18 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
  */
 const struct pipe_stream_output_info *
 ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
+{
+   return &shader->info.stream_output;
+}
+
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader)
 {
    const struct ilo_shader *kernel = shader->shader;
 
    assert(kernel);
 
-   return &kernel->so_info;
+   return &kernel->sol;
 }
 
 /**
index 8a359001bb821c9d8aa5857e1d663152a42c44ab..ddcd6f0356f9f2c87b1e4575c5bec99c22ebe8af 100644 (file)
@@ -96,6 +96,7 @@ struct ilo_rasterizer_state;
 struct ilo_shader_cache;
 struct ilo_shader_state;
 struct ilo_shader_cso;
+struct ilo_state_sol;
 struct ilo_state_vector;
 
 struct ilo_shader_cache *
@@ -168,6 +169,9 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
 const struct pipe_stream_output_info *
 ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
 
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader);
+
 const struct ilo_kernel_routing *
 ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
 
index 39d0d7eac8b0ad860d7f37388791a44fa92b9362..ae4639fe3f7806157424e15a62b5fbd4b75715cc 100644 (file)
@@ -32,6 +32,7 @@
 #include "core/ilo_state_cc.h"
 #include "core/ilo_state_raster.h"
 #include "core/ilo_state_sampler.h"
+#include "core/ilo_state_sol.h"
 #include "core/ilo_state_surface.h"
 #include "core/ilo_state_viewport.h"
 #include "core/ilo_state_zs.h"
index d2dc2f5b5b436519268f2126654150669787a408..603d13e5766acd9d345c0c711cdc3f803519832c 100644 (file)
@@ -28,6 +28,8 @@
 #ifndef ILO_SHADER_INTERNAL_H
 #define ILO_SHADER_INTERNAL_H
 
+#include "core/ilo_state_sol.h"
+
 #include "ilo_common.h"
 #include "ilo_state.h"
 #include "ilo_shader.h"
@@ -111,7 +113,9 @@ struct ilo_shader {
 
    bool stream_output;
    int svbi_post_inc;
-   struct pipe_stream_output_info so_info;
+
+   uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2];
+   struct ilo_state_sol sol;
 
    /* for VS stream output / rasterizer discard */
    int gs_offsets[3];