i965: Add blorp support for gen4-5
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 9 Sep 2016 23:30:24 +0000 (16:30 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 26 May 2017 14:58:01 +0000 (07:58 -0700)
Due to complications with things such as URB setup on gen4-5, it's
easier to keep gen4 support in blorp completely internal to i965.  This
makes things a bit awkward because that means there's a file in i965
that includes blorp_priv.h but it's either that or have a file in blorp
that includes brw_context.h.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
src/intel/blorp/blorp.c
src/intel/blorp/blorp_blit.c
src/intel/blorp/blorp_clear.c
src/intel/blorp/blorp_genX_exec.h
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_blorp.c
src/mesa/drivers/dri/i965/brw_blorp.h
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/gen4_blorp_exec.h [new file with mode: 0644]
src/mesa/drivers/dri/i965/genX_blorp_exec.c

index 7f1566f00a4380995a044098a984969783cdea16..ea3b8252a2ad92d85556adfc857fa2de8666a122 100644 (file)
@@ -124,10 +124,10 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
       info->z_offset = 0;
    }
 
-   /* Sandy Bridge has a limit of a maximum of 512 layers for layered
-    * rendering.
+   /* Sandy Bridge and earlier have a limit of a maximum of 512 layers for
+    * layered rendering.
     */
-   if (is_render_target && blorp->isl_dev->info->gen == 6)
+   if (is_render_target && blorp->isl_dev->info->gen <= 6)
       info->view.array_len = MIN2(info->view.array_len, 512);
 }
 
index 1f8ea492de950a93baceceb5f28f6f36f3c1e3cc..fe24f1f47533c732b84eda2e45b65e394a6f4ce5 100644 (file)
@@ -1672,6 +1672,18 @@ try_blorp_blit(struct blorp_batch *batch,
                                    coords->y.dst0, coords->y.dst1,
                                    coords->y.mirror);
 
+
+   if (devinfo->gen == 4) {
+      /* The MinLOD and MinimumArrayElement don't work properly for cube maps.
+       * Convert them to a single slice on gen4.
+       */
+      if (params->dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)
+         blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, &params->dst);
+
+      if (params->src.surf.usage & ISL_SURF_USAGE_CUBE_BIT)
+         blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, &params->src);
+   }
+
    if (devinfo->gen > 6 &&
        params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
       assert(params->dst.surf.samples > 1);
index 40a1a10b3da595a050a569f2db66f38649dd45ff..fea5eb7b4422bc94a106562b3151b3637964f5b9 100644 (file)
@@ -366,11 +366,6 @@ blorp_clear(struct blorp_batch *batch,
    struct blorp_params params;
    blorp_params_init(&params);
 
-   params.x0 = x0;
-   params.y0 = y0;
-   params.x1 = x1;
-   params.y1 = y1;
-
    /* Manually apply the clear destination swizzle.  This way swizzled clears
     * will work for swizzles which we can't normally use for rendering and it
     * also ensures that they work on pre-Haswell hardware which can't swizlle
@@ -427,6 +422,27 @@ blorp_clear(struct blorp_batch *batch,
                                   start_layer, format, true);
       params.dst.view.swizzle = swizzle;
 
+      params.x0 = x0;
+      params.y0 = y0;
+      params.x1 = x1;
+      params.y1 = y1;
+
+      /* The MinLOD and MinimumArrayElement don't work properly for cube maps.
+       * Convert them to a single slice on gen4.
+       */
+      if (batch->blorp->isl_dev->info->gen == 4 &&
+          (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) {
+         blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, &params.dst);
+
+         if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
+            /* This is gen4 so there is no multisampling and sa == px. */
+            params.x0 += params.dst.tile_x_sa;
+            params.y0 += params.dst.tile_y_sa;
+            params.x1 += params.dst.tile_x_sa;
+            params.y1 += params.dst.tile_y_sa;
+         }
+      }
+
       params.num_samples = params.dst.surf.samples;
 
       /* We may be restricted on the number of layers we can bind at any one
index 058dedc1981b7b2a20a934695e515090bfc1f031..8b9b8d277552fbfc34aff7caee295bc7cd2d6b5f 100644 (file)
@@ -76,6 +76,10 @@ static void
 blorp_emit_urb_config(struct blorp_batch *batch,
                       unsigned vs_entry_size, unsigned sf_entry_size);
 
+static void
+blorp_emit_pipeline(struct blorp_batch *batch,
+                    const struct blorp_params *params);
+
 /***** BEGIN blorp_exec implementation ******/
 
 #include "genxml/gen_macros.h"
@@ -272,6 +276,9 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch,
    vb[0].BufferAccessType = VERTEXDATA;
    vb[0].EndAddress = vb[0].BufferStartingAddress;
    vb[0].EndAddress.offset += size - 1;
+#elif GEN_GEN == 4
+   vb[0].BufferAccessType = VERTEXDATA;
+   vb[0].MaxIndex = 2;
 #endif
 
    blorp_emit_input_varying_data(batch, params,
@@ -290,6 +297,9 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch,
    vb[1].BufferAccessType = INSTANCEDATA;
    vb[1].EndAddress = vb[1].BufferStartingAddress;
    vb[1].EndAddress.offset += size - 1;
+#elif GEN_GEN == 4
+   vb[1].BufferAccessType = INSTANCEDATA;
+   vb[1].MaxIndex = 0;
 #endif
 
    const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2;
@@ -309,7 +319,8 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
 {
    const unsigned num_varyings =
       params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
-   const unsigned num_elements = 2 + num_varyings;
+   bool need_ndc = batch->blorp->compiler->devinfo->gen <= 5;
+   const unsigned num_elements = 2 + need_ndc + num_varyings;
 
    struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
    memset(ve, 0, num_elements * sizeof(*ve));
@@ -382,8 +393,31 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
 #endif
       .Component2Control = VFCOMP_STORE_SRC,
       .Component3Control = VFCOMP_STORE_SRC,
+#if GEN_GEN <= 5
+      .DestinationElementOffset = slot * 4,
+#endif
+   };
+   slot++;
+
+#if GEN_GEN <= 5
+   /* On Iron Lake and earlier, a native device coordinates version of the
+    * position goes right after the normal VUE header and before position.
+    * Since w == 1 for all of our coordinates, this is just a copy of the
+    * position.
+    */
+   ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
+      .VertexBufferIndex = 0,
+      .Valid = true,
+      .SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,
+      .SourceElementOffset = 0,
+      .Component0Control = VFCOMP_STORE_SRC,
+      .Component1Control = VFCOMP_STORE_SRC,
+      .Component2Control = VFCOMP_STORE_SRC,
+      .Component3Control = VFCOMP_STORE_1_FP,
+      .DestinationElementOffset = slot * 4,
    };
    slot++;
+#endif
 
    ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
       .VertexBufferIndex = 0,
@@ -394,6 +428,9 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
       .Component1Control = VFCOMP_STORE_SRC,
       .Component2Control = VFCOMP_STORE_SRC,
       .Component3Control = VFCOMP_STORE_1_FP,
+#if GEN_GEN <= 5
+      .DestinationElementOffset = slot * 4,
+#endif
    };
    slot++;
 
@@ -407,6 +444,9 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
          .Component1Control = VFCOMP_STORE_SRC,
          .Component2Control = VFCOMP_STORE_SRC,
          .Component3Control = VFCOMP_STORE_SRC,
+#if GEN_GEN <= 5
+         .DestinationElementOffset = slot * 4,
+#endif
       };
       slot++;
    }
@@ -1162,6 +1202,7 @@ static void
 blorp_emit_surface_state(struct blorp_batch *batch,
                          const struct brw_blorp_surface_info *surface,
                          void *state, uint32_t state_offset,
+                         const bool color_write_disables[4],
                          bool is_render_target)
 {
    const struct isl_device *isl_dev = batch->blorp->isl_dev;
@@ -1178,13 +1219,26 @@ blorp_emit_surface_state(struct blorp_batch *batch,
    if (aux_usage == ISL_AUX_USAGE_HIZ)
       aux_usage = ISL_AUX_USAGE_NONE;
 
+   isl_channel_mask_t write_disable_mask = 0;
+   if (is_render_target && GEN_GEN <= 5) {
+      if (color_write_disables[0])
+         write_disable_mask |= ISL_CHANNEL_RED_BIT;
+      if (color_write_disables[1])
+         write_disable_mask |= ISL_CHANNEL_GREEN_BIT;
+      if (color_write_disables[2])
+         write_disable_mask |= ISL_CHANNEL_BLUE_BIT;
+      if (color_write_disables[3])
+         write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;
+   }
+
    const uint32_t mocs =
       is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex;
 
    isl_surf_fill_state(batch->blorp->isl_dev, state,
                        .surf = &surf, .view = &surface->view,
                        .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
-                       .mocs = mocs, .clear_color = surface->clear_color);
+                       .mocs = mocs, .clear_color = surface->clear_color,
+                       .write_disables = write_disable_mask);
 
    blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
                        surface->addr, 0);
@@ -1257,7 +1311,7 @@ blorp_emit_surface_states(struct blorp_batch *batch,
          blorp_emit_surface_state(batch, &params->dst,
                                   surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
                                   surface_offsets[BLORP_RENDERBUFFER_BT_INDEX],
-                                  true);
+                                  params->color_write_disable, true);
       } else {
          assert(params->depth.enabled || params->stencil.enabled);
          const struct brw_blorp_surface_info *surface =
@@ -1269,7 +1323,8 @@ blorp_emit_surface_states(struct blorp_batch *batch,
       if (params->src.enabled) {
          blorp_emit_surface_state(batch, &params->src,
                                   surface_maps[BLORP_TEXTURE_BT_INDEX],
-                                  surface_offsets[BLORP_TEXTURE_BT_INDEX], false);
+                                  surface_offsets[BLORP_TEXTURE_BT_INDEX],
+                                  NULL, false);
       }
    }
 
index 37338167c9831eb79ad314eb8dbb988053d64cac..cc030c2adeb7ed2a821774910feb801732e37ead 100644 (file)
@@ -126,12 +126,15 @@ i965_FILES = \
        libdrm_macros.h
 
 i965_gen4_FILES = \
+       genX_blorp_exec.c \
        genX_state_upload.c
 
 i965_gen45_FILES = \
+       genX_blorp_exec.c \
        genX_state_upload.c
 
 i965_gen5_FILES = \
+       genX_blorp_exec.c \
        genX_state_upload.c
 
 i965_gen6_FILES = \
index 64aa4c90883b72dfb3cc600a6ed01377ced0053b..7404606b9b62c927fc1c9104e97955a8350b4f8c 100644 (file)
@@ -71,6 +71,16 @@ brw_blorp_init(struct brw_context *brw)
    brw->blorp.compiler = brw->screen->compiler;
 
    switch (brw->gen) {
+   case 4:
+      if (brw->is_g4x) {
+         brw->blorp.exec = gen45_blorp_exec;
+      } else {
+         brw->blorp.exec = gen4_blorp_exec;
+      }
+      break;
+   case 5:
+      brw->blorp.exec = gen5_blorp_exec;
+      break;
    case 6:
       brw->blorp.mocs.tex = 0;
       brw->blorp.mocs.rb = 0;
index ee4bf3bf541f945b7da61b6a5021a93f83b08679..8743d963abcbe108d614c3c2e3e05637f039eae9 100644 (file)
@@ -72,6 +72,12 @@ void
 intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
               unsigned int level, unsigned int layer, enum blorp_hiz_op op);
 
+void gen4_blorp_exec(struct blorp_batch *batch,
+                     const struct blorp_params *params);
+void gen45_blorp_exec(struct blorp_batch *batch,
+                      const struct blorp_params *params);
+void gen5_blorp_exec(struct blorp_batch *batch,
+                     const struct blorp_params *params);
 void gen6_blorp_exec(struct blorp_batch *batch,
                      const struct blorp_params *params);
 void gen7_blorp_exec(struct blorp_batch *batch,
index d3ed871618c43644514464e298f0e623c2a2a0c6..c815a0454d74c697e28203a6e17f03944540a1d6 100644 (file)
@@ -1118,8 +1118,7 @@ brwCreateContext(gl_api api,
 
    brw_init_surface_formats(brw);
 
-   if (brw->gen >= 6)
-      brw_blorp_init(brw);
+   brw_blorp_init(brw);
 
    brw->urb.size = devinfo->urb.size;
 
diff --git a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
new file mode 100644 (file)
index 0000000..183c0da
--- /dev/null
@@ -0,0 +1,197 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+static inline struct blorp_address
+dynamic_state_address(struct blorp_batch *batch, uint32_t offset)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   return (struct blorp_address) {
+      .buffer = brw->batch.bo,
+      .offset = offset,
+      .write_domain = 0,
+      .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+   };
+}
+
+static inline struct blorp_address
+instruction_state_address(struct blorp_batch *batch, uint32_t offset)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   return (struct blorp_address) {
+      .buffer = brw->cache.bo,
+      .offset = offset,
+      .write_domain = 0,
+      .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+   };
+}
+
+static struct blorp_address
+blorp_emit_vs_state(struct blorp_batch *batch,
+                    const struct blorp_params *params)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   uint32_t offset;
+   blorp_emit_dynamic(batch, GENX(VS_STATE), vs, 64, &offset) {
+      vs.Enable = false;
+      vs.URBEntryAllocationSize = brw->urb.vsize - 1;
+#if GEN_GEN == 5
+      vs.NumberofURBEntries = brw->urb.nr_vs_entries >> 2;
+#else
+      vs.NumberofURBEntries = brw->urb.nr_vs_entries;
+#endif
+   }
+
+   return dynamic_state_address(batch, offset);
+}
+
+static struct blorp_address
+blorp_emit_sf_state(struct blorp_batch *batch,
+                    const struct blorp_params *params)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+   const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
+
+   uint32_t offset;
+   blorp_emit_dynamic(batch, GENX(SF_STATE), sf, 64, &offset) {
+#if GEN_GEN == 4
+      sf.KernelStartPointer =
+         instruction_state_address(batch, params->sf_prog_kernel);
+#else
+      sf.KernelStartPointer = params->sf_prog_kernel;
+#endif
+      sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
+      sf.VertexURBEntryReadLength = prog_data->urb_read_length;
+      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
+      sf.DispatchGRFStartRegisterForURBData = 3;
+
+      sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
+      sf.NumberofURBEntries = brw->urb.nr_sf_entries;
+
+#if GEN_GEN == 5
+      sf.MaximumNumberofThreads = MIN2(48, brw->urb.nr_sf_entries) - 1;
+#else
+      sf.MaximumNumberofThreads = MIN2(24, brw->urb.nr_sf_entries) - 1;
+#endif
+
+      sf.ViewportTransformEnable = false;
+
+      sf.CullMode = CULLMODE_NONE;
+   }
+
+   return dynamic_state_address(batch, offset);
+}
+
+static struct blorp_address
+blorp_emit_wm_state(struct blorp_batch *batch,
+                    const struct blorp_params *params)
+{
+   const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
+
+   uint32_t offset;
+   blorp_emit_dynamic(batch, GENX(WM_STATE), wm, 64, &offset) {
+      if (params->src.enabled) {
+         /* Iron Lake can't do sampler prefetch */
+         wm.SamplerCount = (GEN_GEN != 5);
+         wm.BindingTableEntryCount = 2;
+         uint32_t sampler = blorp_emit_sampler_state(batch, params);
+         wm.SamplerStatePointer = dynamic_state_address(batch, sampler);
+      }
+
+      if (prog_data) {
+         wm.DispatchGRFStartRegisterForURBData =
+            prog_data->base.dispatch_grf_start_reg;
+         wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
+         wm.SetupURBEntryReadOffset = 0;
+
+         wm.DepthCoefficientURBReadOffset = 1;
+         wm.PixelShaderKillPixel = prog_data->uses_kill;
+         wm.ThreadDispatchEnable = true;
+         wm.EarlyDepthTestEnable = true;
+
+         wm._8PixelDispatchEnable = prog_data->dispatch_8;
+         wm._16PixelDispatchEnable = prog_data->dispatch_16;
+
+#if GEN_GEN == 4
+         wm.KernelStartPointer =
+            instruction_state_address(batch, params->wm_prog_kernel);
+         wm.GRFRegisterCount = prog_data->reg_blocks_0;
+#else
+         wm.KernelStartPointer0 = params->wm_prog_kernel;
+         wm.GRFRegisterCount0 = prog_data->reg_blocks_0;
+         wm.KernelStartPointer2 =
+            params->wm_prog_kernel + prog_data->prog_offset_2;
+         wm.GRFRegisterCount2 = prog_data->reg_blocks_2;
+#endif
+      }
+
+      wm.MaximumNumberofThreads =
+         batch->blorp->compiler->devinfo->max_wm_threads - 1;
+   }
+
+   return dynamic_state_address(batch, offset);
+}
+
+static struct blorp_address
+blorp_emit_color_calc_state(struct blorp_batch *batch,
+                            const struct blorp_params *params)
+{
+   uint32_t cc_viewport = blorp_emit_cc_viewport(batch, params);
+
+   uint32_t offset;
+   blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
+      cc.CCViewportStatePointer = dynamic_state_address(batch, cc_viewport);
+   }
+
+   return dynamic_state_address(batch, offset);
+}
+
+static void
+blorp_emit_pipeline(struct blorp_batch *batch,
+                    const struct blorp_params *params)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   emit_urb_config(batch, params);
+
+   blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
+      pp.PointertoVSState = blorp_emit_vs_state(batch, params);
+      pp.GSEnable = false;
+      pp.ClipEnable = false;
+      pp.PointertoSFState = blorp_emit_sf_state(batch, params);
+      pp.PointertoWMState = blorp_emit_wm_state(batch, params);
+      pp.PointertoColorCalcState = blorp_emit_color_calc_state(batch, params);
+   }
+
+   brw_upload_urb_fence(brw);
+
+   blorp_emit(batch, GENX(CS_URB_STATE), curb);
+   blorp_emit(batch, GENX(CONSTANT_BUFFER), curb);
+}
index 72ac274d2f59465c68b5122a4b44ad050cdd5354..3451d7187eb292f25ed819374ec07ff198bc60c6 100644 (file)
 
 #include "blorp/blorp_genX_exec.h"
 
+#if GEN_GEN <= 5
+#include "gen4_blorp_exec.h"
+#endif
+
 #include "brw_blorp.h"
 
 static void *
@@ -169,8 +173,11 @@ blorp_emit_urb_config(struct blorp_batch *batch,
    brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
 
    gen7_upload_urb(brw, vs_entry_size, false, false);
-#else
+#elif GEN_GEN == 6
    gen6_upload_urb(brw, vs_entry_size, false, 0);
+#else
+   /* We calculate it now and emit later. */
+   brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size);
 #endif
 }
 
@@ -215,7 +222,9 @@ retry:
    gen7_l3_state.emit(brw);
 #endif
 
+#if GEN_GEN >= 6
    brw_emit_depth_stall_flushes(brw);
+#endif
 
 #if GEN_GEN == 8
    gen8_write_pma_stall_bits(brw, 0);