anv: Emulate texture swizzle in the shader when needed
authorJason Ekstrand <jason@jlekstrand.net>
Fri, 17 May 2019 15:04:58 +0000 (10:04 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Fri, 17 May 2019 17:25:58 +0000 (12:25 -0500)
Now that we have the descriptor buffer mechanism, emulated texture
swizzle can be implemented in a very non-invasive way.  Previous
attempts all tried to extend the push constant based image param
mechanism which was gross.  This could, in theory, be done much faster
with a magic back-end instruction which does indirect MOVs but Vulkan on
IVB is already so slow this isn't going to matter much.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104355
Cc: "19.1" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_image.c
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
src/intel/vulkan/anv_private.h

index dc33cc6d9a88e62bd091cb87e4ada51fe094e863..18c4ceffb449dad917f72486ad1d582ca1ee1320 100644 (file)
@@ -103,6 +103,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
         type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
       data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
 
+   /* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader */
+   if (device->info.gen == 7 && !device->info.is_haswell &&
+       (type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
+        type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
+      data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE;
+
    return data;
 }
 
@@ -123,6 +129,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
    if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
       size += sizeof(struct anv_address_range_descriptor);
 
+   if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)
+      size += sizeof(struct anv_texture_swizzle_descriptor);
+
    return size;
 }
 
@@ -1184,6 +1193,26 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
 
       anv_descriptor_set_write_image_param(desc_map, image_param);
    }
+
+   if (bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) {
+      assert(!(bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE));
+      assert(image_view);
+      struct anv_texture_swizzle_descriptor desc_data[3];
+      memset(desc_data, 0, sizeof(desc_data));
+
+      for (unsigned p = 0; p < image_view->n_planes; p++) {
+         desc_data[p] = (struct anv_texture_swizzle_descriptor) {
+            .swizzle = {
+               (uint8_t)image_view->planes[p].isl.swizzle.r,
+               (uint8_t)image_view->planes[p].isl.swizzle.g,
+               (uint8_t)image_view->planes[p].isl.swizzle.b,
+               (uint8_t)image_view->planes[p].isl.swizzle.a,
+            },
+         };
+      }
+      memcpy(desc_map, desc_data,
+             MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
+   }
 }
 
 void
index 3841234df14c5313944aa0aac9c34806b506bf84..8e45803a7bcf7924900dada2c5855e5301a55801 100644 (file)
@@ -1278,6 +1278,10 @@ anv_image_fill_surface_state(struct anv_device *device,
    if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
       view.swizzle = anv_swizzle_for_render(view.swizzle);
 
+   /* On Ivy Bridge and Bay Trail we do the swizzle in the shader */
+   if (device->info.gen == 7 && !device->info.is_haswell)
+      view.swizzle = ISL_SWIZZLE_IDENTITY;
+
    /* If this is a HiZ buffer we can sample from with a programmable clear
     * value (SKL+), define the clear value to the optimal constant.
     */
index 3d9ba5c3ecd557f7443b2489edb071548ddc7acc..6be725a5cabeb9dd078e7a75274259f2cd249b7d 100644 (file)
@@ -899,13 +899,98 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
    return plane;
 }
 
+static nir_ssa_def *
+build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
+                       unsigned start, unsigned end)
+{
+   if (start == end - 1) {
+      return srcs[start];
+   } else {
+      unsigned mid = start + (end - start) / 2;
+      return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)),
+                       build_def_array_select(b, srcs, idx, start, mid),
+                       build_def_array_select(b, srcs, idx, mid, end));
+   }
+}
+
 static void
-lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane,
+                       struct apply_pipeline_layout_state *state)
 {
-   state->builder.cursor = nir_before_instr(&tex->instr);
+   assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell);
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
+       nir_tex_instr_is_query(tex) ||
+       tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
+       (tex->is_shadow && tex->is_new_style_shadow))
+      return;
+
+   int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+   assert(deref_src_idx >= 0);
+
+   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+   UNUSED nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   UNUSED unsigned set = var->data.descriptor_set;
+   UNUSED unsigned binding = var->data.binding;
+   UNUSED const struct anv_descriptor_set_binding_layout *bind_layout =
+      &state->layout->set[set].layout->binding[binding];
+   assert(bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE);
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&tex->instr);
+
+   const unsigned plane_offset =
+      plane * sizeof(struct anv_texture_swizzle_descriptor);
+   nir_ssa_def *swiz =
+      build_descriptor_load(deref, plane_offset, 1, 32, state);
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   assert(tex->dest.ssa.bit_size == 32);
+   assert(tex->dest.ssa.num_components == 4);
+
+   /* Initializing to undef is ok; nir_opt_undef will clean it up. */
+   nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
+   nir_ssa_def *comps[8];
+   for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
+      comps[i] = undef;
+
+   comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
+   if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
+      comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
+   else
+      comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
+   comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
+   comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
+   comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
+   comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
+
+   nir_ssa_def *swiz_comps[4];
+   for (unsigned i = 0; i < 4; i++) {
+      nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
+      swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
+   }
+   nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
 
+   /* Rewrite uses before we insert so we don't rewrite this use */
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
+                                  nir_src_for_ssa(swiz_tex_res),
+                                  swiz_tex_res->parent_instr);
+}
+
+static void
+lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+{
    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
 
+   /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader.  Do this
+    * before we lower the derefs away so we can still find the descriptor.
+    */
+   if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell)
+      lower_gen7_tex_swizzle(tex, plane, state);
+
+   state->builder.cursor = nir_before_instr(&tex->instr);
+
    lower_tex_deref(tex, nir_tex_src_texture_deref,
                    &tex->texture_index, plane, state);
 
index 2bc4d4a05b6d633c83fee35db51d355dcd380e03..e86561adf9708ee0b24ff9d1ca4e836f75e845ea 100644 (file)
@@ -1548,6 +1548,17 @@ struct anv_sampled_image_descriptor {
    uint32_t sampler;
 };
 
+struct anv_texture_swizzle_descriptor {
+   /** Texture swizzle
+    *
+    * See also nir_intrinsic_channel_select_intel
+    */
+   uint8_t swizzle[4];
+
+   /** Unused padding to ensure the struct is a multiple of 64 bits */
+   uint32_t _pad;
+};
+
 /** Struct representing a storage image descriptor */
 struct anv_storage_image_descriptor {
    /** Bindless image handles
@@ -1589,6 +1600,8 @@ enum anv_descriptor_data {
    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
    /** Storage image handles */
    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
+   /** Storage image handles */
+   ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
 };
 
 struct anv_descriptor_set_binding_layout {