i965: Define and initialize image parameter structure.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 21 Jan 2015 15:34:49 +0000 (17:34 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Tue, 11 Aug 2015 12:07:38 +0000 (15:07 +0300)
This will be used to pass image meta-data to the shader when we cannot
use typed surface reads and writes.  All entries except surface_idx
and size are otherwise unused and will get eliminated by the uniform
packing pass.  size will be used for bounds checking with some image
formats and will be useful for ARB_shader_image_size too.  surface_idx
is always used.

v2: Add CS support.  Move the image_params array back to
    brw_stage_prog_data.
v3: Improve documentation.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_cs.cpp
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c

index 3aff9246db8967e30b946cd874a144779d51e59e..b851f3830a27598d22dadd22eb3819b8536bad4f 100644 (file)
@@ -361,6 +361,7 @@ struct brw_stage_prog_data {
 
    GLuint nr_params;       /**< number of float params/constants */
    GLuint nr_pull_params;
+   unsigned nr_image_params;
 
    unsigned curb_read_length;
    unsigned total_scratch;
@@ -381,6 +382,59 @@ struct brw_stage_prog_data {
     */
    const gl_constant_value **param;
    const gl_constant_value **pull_param;
+
+   /**
+    * Image metadata passed to the shader as uniforms.  This is deliberately
+    * ignored by brw_stage_prog_data_compare() because its contents don't have
+    * any influence on program compilation.
+    */
+   struct brw_image_param *image_param;
+};
+
+/*
+ * Image metadata structure as laid out in the shader parameter
+ * buffer.  Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them.  That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET      0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET           4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET             8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET           12
+#define BRW_IMAGE_PARAM_TILING_OFFSET           16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET        20
+#define BRW_IMAGE_PARAM_SIZE                    24
+
+struct brw_image_param {
+   /** Surface binding table index. */
+   uint32_t surface_idx;
+
+   /** Offset applied to the X and Y surface coordinates. */
+   uint32_t offset[2];
+
+   /** Surface X, Y and Z dimensions. */
+   uint32_t size[3];
+
+   /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
+    * pixels, vertical slice stride in pixels.
+    */
+   uint32_t stride[4];
+
+   /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+   uint32_t tiling[3];
+
+   /**
+    * Right shift to apply for bit 6 address swizzling.  Two different
+    * swizzles can be specified and will be applied one after the other.  The
+    * resulting address will be:
+    *
+    *  addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
+    *                              (addr >> swizzling[1])))
+    *
+    * Use \c 0xff if any of the swizzles is not required.
+    */
+   uint32_t swizzling[2];
 };
 
 /* Data about a particular attempt to compile a program.  Note that
index 29ee75b1e1a855575363f53a7ce13612e01e16af..59520307f4227545e2505a9cfa87b682f5c91fa4 100644 (file)
@@ -190,7 +190,10 @@ brw_codegen_cs_prog(struct brw_context *brw,
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    prog_data.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
    prog_data.base.nr_params = param_count;
+   prog_data.base.nr_image_params = cs->NumImages;
 
    program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
                          &cp->program, prog, &program_size);
index 9c59c8a0dfc04210a8fa7b90528eb14ce5961068..d1a955a4de3b33ffecaf4565bd8dd2cb6ae28e70 100644 (file)
@@ -69,7 +69,10 @@ brw_codegen_gs_prog(struct brw_context *brw,
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    c.prog_data.base.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   c.prog_data.base.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
    c.prog_data.base.base.nr_params = param_count;
+   c.prog_data.base.base.nr_image_params = gs->NumImages;
 
    if (brw->gen >= 7) {
       if (gp->program.OutputType == GL_POINTS) {
index 2b9b005c782d6ae82122b8d9fd9727e6bbd9798b..20bc7a97d20322da35988989b5f1cf8bd7307632 100644 (file)
@@ -122,7 +122,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
        * conservative here.
        */
       param_count = vs->num_uniform_components * 4;
-
+      stage_prog_data->nr_image_params = vs->NumImages;
    } else {
       param_count = vp->program.Base.Parameters->NumParameters * 4;
    }
@@ -135,6 +135,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    stage_prog_data->pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   stage_prog_data->image_param =
+      rzalloc_array(NULL, struct brw_image_param,
+                    stage_prog_data->nr_image_params);
    stage_prog_data->nr_params = param_count;
 
    GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
index 9b90a7c191f72815b8c89b831111189396a1d15a..c8583c09fe6fe894ab9fddded035d7d154528f95 100644 (file)
@@ -190,6 +190,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
    int param_count;
    if (fs) {
       param_count = fs->num_uniform_components;
+      prog_data.base.nr_image_params = fs->NumImages;
    } else {
       param_count = fp->program.Base.Parameters->NumParameters * 4;
    }
@@ -199,6 +200,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    prog_data.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param,
+                    prog_data.base.nr_image_params);
    prog_data.base.nr_params = param_count;
 
    prog_data.barycentric_interp_modes =
index 33e045f2099815831b918bc26a995649d86207a6..35df6f13e19227950b614882671eecb87141f9dd 100644 (file)
@@ -1044,6 +1044,103 @@ get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
    }
 }
 
+static void
+update_default_image_param(struct brw_context *brw,
+                           struct gl_image_unit *u,
+                           unsigned surface_idx,
+                           struct brw_image_param *param)
+{
+   memset(param, 0, sizeof(*param));
+   param->surface_idx = surface_idx;
+   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
+    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
+    * detailed explanation of these parameters.
+    */
+   param->swizzling[0] = 0xff;
+   param->swizzling[1] = 0xff;
+}
+
+static void
+update_buffer_image_param(struct brw_context *brw,
+                          struct gl_image_unit *u,
+                          unsigned surface_idx,
+                          struct brw_image_param *param)
+{
+   struct gl_buffer_object *obj = u->TexObj->BufferObject;
+
+   update_default_image_param(brw, u, surface_idx, param);
+
+   param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
+   param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
+}
+
+static void
+update_texture_image_param(struct brw_context *brw,
+                           struct gl_image_unit *u,
+                           unsigned surface_idx,
+                           struct brw_image_param *param)
+{
+   struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
+
+   update_default_image_param(brw, u, surface_idx, param);
+
+   param->size[0] = minify(mt->logical_width0, u->Level);
+   param->size[1] = minify(mt->logical_height0, u->Level);
+   param->size[2] = (!u->Layered ? 1 :
+                     u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
+                     u->TexObj->Target == GL_TEXTURE_3D ?
+                     minify(mt->logical_depth0, u->Level) :
+                     mt->logical_depth0);
+
+   intel_miptree_get_image_offset(mt, u->Level, u->Layer,
+                                  &param->offset[0],
+                                  &param->offset[1]);
+
+   param->stride[0] = mt->cpp;
+   param->stride[1] = mt->pitch / mt->cpp;
+   param->stride[2] =
+      brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
+   param->stride[3] =
+      brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
+
+   if (mt->tiling == I915_TILING_X) {
+      /* An X tile is a rectangular block of 512x8 bytes. */
+      param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
+      param->tiling[1] = _mesa_logbase2(8);
+
+      if (brw->has_swizzling) {
+         /* Right shifts required to swizzle bits 9 and 10 of the memory
+          * address with bit 6.
+          */
+         param->swizzling[0] = 3;
+         param->swizzling[1] = 4;
+      }
+   } else if (mt->tiling == I915_TILING_Y) {
+      /* The layout of a Y-tiled surface in memory isn't really fundamentally
+       * different to the layout of an X-tiled surface, we simply pretend that
+       * the surface is broken up in a number of smaller 16Bx32 tiles, each
+       * one arranged in X-major order just like is the case for X-tiling.
+       */
+      param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
+      param->tiling[1] = _mesa_logbase2(32);
+
+      if (brw->has_swizzling) {
+         /* Right shift required to swizzle bit 9 of the memory address with
+          * bit 6.
+          */
+         param->swizzling[0] = 3;
+      }
+   }
+
+   /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
+    * address calculation algorithm (emit_address_calculation() in
+    * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
+    * modulus equal to the LOD.
+    */
+   param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
+                       0);
+}
+
 static void
 update_image_surface(struct brw_context *brw,
                      struct gl_image_unit *u,
@@ -1067,6 +1164,8 @@ update_image_surface(struct brw_context *brw,
             format, intel_obj->Base.Size / texel_size, texel_size,
             access != GL_READ_ONLY);
 
+         update_buffer_image_param(brw, u, surface_idx, param);
+
       } else {
          struct intel_texture_object *intel_obj = intel_texture_object(obj);
          struct intel_mipmap_tree *mt = intel_obj->mt;
@@ -1094,10 +1193,13 @@ update_image_surface(struct brw_context *brw,
                format, SWIZZLE_XYZW,
                surf_offset, access != GL_READ_ONLY, false);
          }
+
+         update_texture_image_param(brw, u, surface_idx, param);
       }
 
    } else {
       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
+      update_default_image_param(brw, u, surface_idx, param);
    }
 }