iris: some shader bits
authorKenneth Graunke <kenneth@whitecape.org>
Mon, 22 Jan 2018 07:55:04 +0000 (23:55 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:05 +0000 (10:26 -0800)
src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_state.c

index 15fae9ba8768cd6a47c0cf5cbe2e960917e39a9f..022a79a1dc4e70d46237dc6d245bf4d3beb0ccee 100644 (file)
@@ -72,6 +72,7 @@ enum iris_dirty {
    IRIS_DIRTY_FS                       = (1ull << 32),
    IRIS_DIRTY_CS                       = (1ull << 33),
    IRIS_DIRTY_STATE_BASE_ADDRESS       = (1ull << 34),
+   IRIS_DIRTY_URB                      = (1ull << 35),
 };
 
 struct iris_depth_stencil_alpha_state;
index 2eb74263957c05eb43a8199dfe9e41b77c7d198f..4772baea45eb030b925cc4e71f3bc3cd6cd643fd 100644 (file)
@@ -108,6 +108,9 @@ iris_bind_tes_state(struct pipe_context *ctx, void *hwcso)
 {
    struct iris_context *ice = (struct iris_context *)ctx;
 
+   if (!!hwcso != !!ice->shaders.progs[MESA_SHADER_TESS_EVAL])
+      ice->state.dirty |= IRIS_DIRTY_URB;
+
    ice->shaders.progs[MESA_SHADER_TESS_EVAL] = hwcso;
    ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_TES;
 }
@@ -117,6 +120,9 @@ iris_bind_gs_state(struct pipe_context *ctx, void *hwcso)
 {
    struct iris_context *ice = (struct iris_context *)ctx;
 
+   if (!!hwcso != !!ice->shaders.progs[MESA_SHADER_GEOMETRY])
+      ice->state.dirty |= IRIS_DIRTY_URB;
+
    ice->shaders.progs[MESA_SHADER_GEOMETRY] = hwcso;
    ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_GS;
 }
@@ -261,6 +267,24 @@ iris_update_compiled_vs(struct iris_context *ice)
       iris_compile_vs(ice, ice->shaders.progs[MESA_SHADER_VERTEX], &key);
 }
 
+static void
+iris_update_compiled_tcs(struct iris_context *ice)
+{
+   // XXX: TCS
+}
+
+static void
+iris_update_compiled_tes(struct iris_context *ice)
+{
+   // XXX: TES
+}
+
+static void
+iris_update_compiled_gs(struct iris_context *ice)
+{
+   // XXX: GS
+}
+
 static bool
 iris_compile_fs(struct iris_context *ice,
                 struct iris_uncompiled_shader *ish,
@@ -379,10 +403,35 @@ update_last_vue_map(struct iris_context *ice)
 void
 iris_update_compiled_shaders(struct iris_context *ice)
 {
+   struct brw_vue_prog_data *old_prog_datas[4];
+   if (!(ice->state.dirty & IRIS_DIRTY_URB)) {
+      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
+         old_prog_datas[i] = (void *) ice->shaders.prog_data[i];
+   }
+
    iris_update_compiled_vs(ice);
+   iris_update_compiled_tcs(ice);
+   iris_update_compiled_tes(ice);
+   iris_update_compiled_gs(ice);
    update_last_vue_map(ice);
    iris_update_compiled_fs(ice);
    // ...
+
+   if (!(ice->state.dirty & IRIS_DIRTY_URB)) {
+      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+         struct brw_vue_prog_data *old = old_prog_datas[i];
+         struct brw_vue_prog_data *new = (void *) ice->shaders.prog_data[i];
+         if (!!old != !!new ||
+             (new && new->urb_entry_size != old->urb_entry_size)) {
+            ice->state.dirty |= IRIS_DIRTY_URB;
+            break;
+         }
+      }
+   }
+
+   if (ice->state.dirty & IRIS_DIRTY_URB) {
+      // ... back to the state module :/
+   }
 }
 
 void
index 8e49312f58da7c65ae9d877c449d0d85cca04f89..0260cb43f80cdfe18bd7e3212e37fa5f414db0de 100644 (file)
@@ -1367,6 +1367,9 @@ iris_upload_render_state(struct iris_context *ice,
 {
    const uint64_t dirty = ice->state.dirty;
 
+   if (dirty & IRIS_DIRTY_URB) {
+   }
+
    if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) {
       struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
       struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
@@ -1624,6 +1627,7 @@ iris_upload_render_state(struct iris_context *ice,
    3DSTATE_GS
    3DSTATE_PS_EXTRA
    3DSTATE_PS
+
    3DSTATE_STREAMOUT
    3DSTATE_SO_BUFFER
    3DSTATE_SO_DECL_LIST
@@ -1649,6 +1653,212 @@ iris_bind_compute_state(struct pipe_context *ctx, void *state)
 {
 }
 
+   //pkt.SamplerCount =                                                     \
+      //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
+   //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 :        \
+      //ffs(stage_state->per_thread_scratch) - 11;                          \
+
+#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix)                          \
+   pkt.KernelStartPointer = prog_offset;                                  \
+   pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4;  \
+   pkt.FloatingPointMode = prog_data->use_alt_mode;                       \
+                                                                          \
+   pkt.DispatchGRFStartRegisterForURBData =                               \
+      prog_data->dispatch_grf_start_reg;                                  \
+   pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;       \
+   pkt.prefix##URBEntryReadOffset = 0;                                    \
+                                                                          \
+   pkt.StatisticsEnable = true;                                           \
+   pkt.Enable           = true;
+
+static void
+iris_create_vs_state(struct gen_device_info *devinfo,
+                     struct brw_vs_prog_data *vs_prog_data,
+                     unsigned prog_offset)
+{
+   struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
+   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
+
+   uint32_t vs_state[GENX(3DSTATE_VS_length)];
+
+   iris_pack_command(GENX(3DSTATE_VS), vs_state, vs) {
+      INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
+      vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
+      vs.SIMD8DispatchEnable = true;
+      vs.UserClipDistanceCullTestEnableBitmask =
+         vue_prog_data->cull_distance_mask;
+   }
+}
+
+static void
+iris_create_tcs_state(struct gen_device_info *devinfo,
+                      struct brw_tcs_prog_data *tcs_prog_data,
+                      unsigned prog_offset)
+{
+   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
+   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
+
+   uint32_t hs_state[GENX(3DSTATE_HS_length)];
+
+   iris_pack_command(GENX(3DSTATE_HS), hs_state, hs) {
+      INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
+
+      hs.InstanceCount = tcs_prog_data->instances - 1;
+      hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
+      hs.IncludeVertexHandles = true;
+   }
+}
+
+static void
+iris_create_tes_state(struct gen_device_info *devinfo,
+                      struct brw_tes_prog_data *tes_prog_data,
+                      unsigned prog_offset)
+{
+   struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
+   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
+
+   uint32_t ds_state[GENX(3DSTATE_DS_length)];
+
+   iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
+      INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
+
+      ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
+      ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
+      ds.ComputeWCoordinateEnable =
+         tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
+
+      ds.UserClipDistanceCullTestEnableBitmask =
+         vue_prog_data->cull_distance_mask;
+   }
+}
+
+static void
+iris_create_gs_state(struct gen_device_info *devinfo,
+                     struct brw_gs_prog_data *gs_prog_data,
+                     unsigned prog_offset)
+{
+   struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
+   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
+
+   uint32_t gs_state[GENX(3DSTATE_GS_length)];
+
+   iris_pack_command(GENX(3DSTATE_GS), gs_state, gs) {
+      INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
+
+      gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
+      gs.OutputTopology = gs_prog_data->output_topology;
+      gs.ControlDataHeaderSize =
+         gs_prog_data->control_data_header_size_hwords;
+      gs.InstanceControl = gs_prog_data->invocations - 1;
+      gs.DispatchMode = SIMD8;
+      gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
+      gs.ControlDataFormat = gs_prog_data->control_data_format;
+      gs.ReorderMode = TRAILING;
+      gs.ExpectedVertexCount = gs_prog_data->vertices_in;
+      gs.MaximumNumberofThreads =
+         GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
+                      : (devinfo->max_gs_threads - 1);
+
+      if (gs_prog_data->static_vertex_count != -1) {
+         gs.StaticOutput = true;
+         gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
+      }
+      gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
+
+      gs.UserClipDistanceCullTestEnableBitmask =
+         vue_prog_data->cull_distance_mask;
+
+      const int urb_entry_write_offset = 1;
+      const uint32_t urb_entry_output_length =
+         DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
+         urb_entry_write_offset;
+
+      gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
+      gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
+   }
+}
+
+static void
+iris_create_fs_state(struct gen_device_info *devinfo,
+                      struct brw_wm_prog_data *wm_prog_data,
+                      unsigned prog_offset)
+{
+   struct brw_stage_prog_data *prog_data = &wm_prog_data->base;
+
+   uint32_t ps_state[GENX(3DSTATE_PS_length)];
+   uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)];
+
+   iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
+      ps.VectorMaskEnable = true;
+      //ps.SamplerCount = ...
+      ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4;
+      ps.FloatingPointMode = prog_data->use_alt_mode;
+      ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1);
+
+      ps.PushConstantEnable = prog_data->nr_params > 0 ||
+                              prog_data->ubo_ranges[0].length > 0;
+
+      /* From the documentation for this packet:
+       * "If the PS kernel does not need the Position XY Offsets to
+       *  compute a Position Value, then this field should be programmed
+       *  to POSOFFSET_NONE."
+       *
+       * "SW Recommendation: If the PS kernel needs the Position Offsets
+       *  to compute a Position XY value, this field should match Position
+       *  ZW Interpolation Mode to ensure a consistent position.xyzw
+       *  computation."
+       *
+       * We only require XY sample offsets. So, this recommendation doesn't
+       * look useful at the moment.  We might need this in future.
+       */
+      ps.PositionXYOffsetSelect =
+         wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
+      ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
+      ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
+      ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
+
+      // XXX: Disable SIMD32 with 16x MSAA
+
+      ps.DispatchGRFStartRegisterForConstantSetupData0 =
+         brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
+      ps.DispatchGRFStartRegisterForConstantSetupData1 =
+         brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
+      ps.DispatchGRFStartRegisterForConstantSetupData2 =
+         brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
+
+      ps.KernelStartPointer0 =
+         prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
+      ps.KernelStartPointer1 =
+         prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
+      ps.KernelStartPointer2 =
+         prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
+   }
+
+   iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
+      psx.PixelShaderValid = true;
+      psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
+      psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
+      psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
+      psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
+      psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
+      psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
+
+      if (wm_prog_data->uses_sample_mask) {
+         /* TODO: conservative rasterization */
+         if (wm_prog_data->post_depth_coverage)
+            psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
+         else
+            psx.InputCoverageMaskState = ICMS_NORMAL;
+      }
+
+      psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
+      psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
+      psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
+
+      // XXX: UAV bit
+   }
+}
+
 void
 iris_destroy_state(struct iris_context *ice)
 {