From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 31 Jul 2018 06:49:34 +0000 (-0700)
Subject: iris: comment everything
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dfe1ee4f6f98408e570b2cce882a566becd9bd7d;p=mesa.git

iris: comment everything

1. Write the code
2. Add comments
3. PROFIT (or just avoid cost of explaining or relearning things...)
---

diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c
index a7fc7f710ef..bae1720601e 100644
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@@ -22,6 +22,23 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+/**
+ * @file iris_batch.c
+ *
+ * Batchbuffer and command submission module.
+ *
+ * Every API draw call results in a number of GPU commands, which we
+ * collect into a "batch buffer".  Typically, many draw calls are grouped
+ * into a single batch to amortize command submission overhead.
+ *
+ * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl.
+ * One critical piece of data is the "validation list", which contains a
+ * list of the buffer objects (BOs) which the commands in the GPU need.
+ * The kernel will make sure these are resident and pinned at the correct
+ * virtual memory address before executing our batch.  If a BO is not in
+ * the validation list, it effectively does not exist, so take care.
+ */
+
 #include "iris_batch.h"
 #include "iris_binder.h"
 #include "iris_bufmgr.h"
@@ -46,12 +63,13 @@
  */
 #define BATCH_RESERVED 16
 
-static void decode_batch(struct iris_batch *batch);
-
 static void
 iris_batch_reset(struct iris_batch *batch);
 
-UNUSED static void
+/**
+ * Debugging code to dump the validation list, used by INTEL_DEBUG=submit.
+ */
+static void
 dump_validation_list(struct iris_batch *batch)
 {
    fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
@@ -72,6 +90,9 @@ dump_validation_list(struct iris_batch *batch)
    }
 }
 
+/**
+ * Return BO information to the batch decoder (for debugging).
+ */
 static struct gen_batch_decode_bo
 decode_get_bo(void *v_batch, uint64_t address)
 {
@@ -95,6 +116,17 @@ decode_get_bo(void *v_batch, uint64_t address)
    return (struct gen_batch_decode_bo) { };
 }
 
+/**
+ * Decode the current batch.
+ */
+static void
+decode_batch(struct iris_batch *batch)
+{
+   void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
+   gen_print_batch(&batch->decoder, map, batch->primary_batch_size,
+                   batch->exec_bos[0]->gtt_offset);
+}
+
 static bool
 uint_key_compare(const void *a, const void *b)
 {
@@ -280,6 +312,13 @@ iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate)
    }
 }
 
+/**
+ * Ensure the current command buffer has \param size bytes of space
+ * remaining.  If not, this creates a secondary batch buffer and emits
+ * a jump from the primary batch to the start of the secondary.
+ *
+ * Most callers want iris_get_command_space() instead.
+ */
 void
 iris_require_command_space(struct iris_batch *batch, unsigned size)
 {
@@ -306,6 +345,12 @@ iris_require_command_space(struct iris_batch *batch, unsigned size)
    }
 }
 
+/**
+ * Allocate space in the current command buffer, and return a pointer
+ * to the mapped area so the caller can write commands there.
+ *
+ * This should be called whenever emitting commands.
+ */
 void *
 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
 {
@@ -315,6 +360,9 @@ iris_get_command_space(struct iris_batch *batch, unsigned bytes)
    return map;
 }
 
+/**
+ * Helper to emit GPU commands - allocates space, copies them there.
+ */
 void
 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
 {
@@ -346,6 +394,9 @@ iris_finish_batch(struct iris_batch *batch)
       batch->primary_batch_size = batch_bytes_used(batch);
 }
 
+/**
+ * Submit the batch to the GPU via execbuffer2.
+ */
 static int
 submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd)
 {
@@ -410,11 +461,14 @@ submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd)
 }
 
 /**
- * The in_fence_fd is ignored if -1.  Otherwise this function takes ownership
- * of the fd.
+ * Flush the batch buffer, submitting it to the GPU and resetting it so
+ * we're ready to emit the next batch.
+ *
+ * \param in_fence_fd is ignored if -1.  Otherwise, this function takes
+ * ownership of the fd.
  *
- * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership
- * of the returned fd.
+ * \param out_fence_fd is ignored if NULL.  Otherwise, the caller must
+ * take ownership of the returned fd.
  */
 int
 _iris_batch_flush_fence(struct iris_batch *batch,
@@ -484,6 +538,11 @@ _iris_batch_flush_fence(struct iris_batch *batch,
    return 0;
 }
 
+/**
+ * Does the current batch refer to the given BO?
+ *
+ * (In other words, is the BO in the current batch's validation list?)
+ */
 bool
 iris_batch_references(struct iris_batch *batch, struct iris_bo *bo)
 {
@@ -498,7 +557,11 @@ iris_batch_references(struct iris_batch *batch, struct iris_bo *bo)
    return false;
 }
 
-/* This is the only way buffers get added to the validate list.
+/**
+ * Add a buffer to the current batch's validation list.
+ *
+ * You must call this on any BO you wish to use in this batch, to ensure
+ * that it's resident when the GPU commands execute.
  */
 void
 iris_use_pinned_bo(struct iris_batch *batch,
@@ -510,18 +573,3 @@ iris_use_pinned_bo(struct iris_batch *batch,
    if (writable)
       batch->validation_list[index].flags |= EXEC_OBJECT_WRITE;
 }
-
-static void
-decode_batch(struct iris_batch *batch)
-{
-   //if (batch->bo != batch->exec_bos[0]) {
-   void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
-   gen_print_batch(&batch->decoder, map, batch->primary_batch_size,
-                   batch->exec_bos[0]->gtt_offset);
-
-      //fprintf(stderr, "Secondary batch...\n");
-   //}
-
-   //gen_print_batch(&batch->decoder, batch->map, batch_bytes_used(batch),
-                   //batch->bo->gtt_offset);
-}
diff --git a/src/gallium/drivers/iris/iris_binder.c b/src/gallium/drivers/iris/iris_binder.c
index 07e8fe83ecf..8755e1fb8fe 100644
--- a/src/gallium/drivers/iris/iris_binder.c
+++ b/src/gallium/drivers/iris/iris_binder.c
@@ -21,6 +21,37 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+/**
+ * @file iris_binder.c
+ *
+ * Shader programs refer to most resources via integer handles.  These are
+ * indexes (BTIs) into a "Binding Table", which is simply a list of pointers
+ * to SURFACE_STATE entries.  Each shader stage has its own binding table,
+ * set by the 3DSTATE_BINDING_TABLE_POINTERS_* commands.  Both the binding
+ * table itself and the SURFACE_STATEs are relative to Surface State Base
+ * Address, so they all live in IRIS_MEMZONE_SURFACE.
+ *
+ * Unfortunately, the hardware designers made 3DSTATE_BINDING_TABLE_POINTERS
+ * only accept a 16-bit pointer.  This means that all binding tables have to
+ * live within the 64kB range starting at Surface State Base Address.  (The
+ * actual SURFACE_STATE entries can live anywhere in the 4GB zone, as the
+ * binding table entries are full 32-bit pointers.)
+ *
+ * We stream out binding tables dynamically, storing them in a single 64kB
+ * "binder" buffer, located at IRIS_BINDER_ADDRESS.  Before emitting a draw
+ * call, we reserve space for any new binding tables needed by bound shaders.
+ * If there is no space, we flush the batch and swap out the binder for a
+ * new empty BO.
+ *
+ * XXX: This should be fancier.  We currently replace the binder with a
+ * fresh BO on every batch, which causes the kernel to stall, trying to
+ * pin the new buffer at the same memory address as the old one.  We ought
+ * to avoid this by using a ringbuffer, tracking the busy section of the BO,
+ * and cycling back around where possible to avoid replacing it at all costs.
+ *
+ * XXX: if we do have to flush, we should emit a performance warning.
+ */
+
 #include <stdlib.h>
 #include "util/u_math.h"
 #include "iris_binder.h"
@@ -28,7 +59,7 @@
 #include "iris_context.h"
 
 /**
- * Reserve a block of space in the binder.
+ * Reserve a block of space in the binder, given the raw size in bytes.
  */
 uint32_t
 iris_binder_reserve(struct iris_batch *batch, unsigned size)
@@ -58,6 +89,9 @@ iris_binder_reserve(struct iris_batch *batch, unsigned size)
 
 /**
  * Reserve and record binder space for 3D pipeline shader stages.
+ *
+ * Note that you must actually populate the new binding tables after
+ * calling this command - the new area is uninitialized.
  */
 void
 iris_binder_reserve_3d(struct iris_batch *batch,
@@ -109,6 +143,9 @@ iris_init_binder(struct iris_binder *binder, struct iris_bufmgr *bufmgr)
    binder->insert_point = INIT_INSERT_POINT;
 }
 
+/**
+ * Is the binder empty?  (If so, old binding table pointers are stale.)
+ */
 bool
 iris_binder_is_empty(struct iris_binder *binder)
 {
diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c
index b287876e0a4..3c4efd4c47e 100644
--- a/src/gallium/drivers/iris/iris_blit.c
+++ b/src/gallium/drivers/iris/iris_blit.c
@@ -68,6 +68,12 @@ iris_get_blorp_format(enum pipe_format pf)
    }
 }
 
+/**
+ * The pipe->blit() driver hook.
+ *
+ * This performs a blit between two surfaces, which copies data but may
+ * also perform format conversion, scaling, flipping, and so on.
+ */
 static void
 iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
 {
@@ -161,6 +167,12 @@ iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
    blorp_batch_finish(&blorp_batch);
 }
 
+/**
+ * The pipe->resource_copy_region() driver hook.
+ *
+ * This implements ARB_copy_image semantics - a raw memory copy between
+ * compatible view classes.
+ */
 static void
 iris_resource_copy_region(struct pipe_context *ctx,
                           struct pipe_resource *dst,
diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c
index 8def4619e8c..e12022e805a 100644
--- a/src/gallium/drivers/iris/iris_blorp.c
+++ b/src/gallium/drivers/iris/iris_blorp.c
@@ -20,6 +20,21 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_blorp.c
+ *
+ * ============================= GENXML CODE =============================
+ *              [This file is compiled once per generation.]
+ * =======================================================================
+ *
+ * GenX specific code for working with BLORP (blitting, resolves, clears
+ * on the 3D engine).  This provides the driver-specific hooks needed to
+ * implement the BLORP API.
+ *
+ * See iris_blit.c, iris_clear.c, and so on.
+ */
+
 #include <assert.h>
 
 #include "iris_batch.h"
diff --git a/src/gallium/drivers/iris/iris_border_color.c b/src/gallium/drivers/iris/iris_border_color.c
index 03a698a9c6e..deaca253590 100644
--- a/src/gallium/drivers/iris/iris_border_color.c
+++ b/src/gallium/drivers/iris/iris_border_color.c
@@ -21,6 +21,28 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+/**
+ * @file iris_border_color.c
+ *
+ * Each SAMPLER_STATE points to a SAMPLER_BORDER_COLOR_STATE entry,
+ * describing the color to return when sampling outside the texture
+ * when using CLAMP_TO_BORDER wrap modes.
+ *
+ * These must be stored relative to Dynamic State Base Address.
+ * Unfortunately, the hardware designers only gave us a 24-bit pointer
+ * rather than an actual graphics address, so it must be stored in the
+ * bottom 16MB of that memory zone.  This means we can't simply use
+ * u_upload_mgr like we do for most state.
+ *
+ * To work around this, we maintain a single "border color pool" BO
+ * which we pin at the base of IRIS_MEMZONE_DYNAMIC.  Since most border
+ * colors are the same (typically black or white), we maintain a hash
+ * table of known colors, and reuse the same entries.  This avoids
+ * wasting a lot of space in the pool.
+ *
+ * If it ever does fill up, we simply flush.
+ */
+
 #include <stdlib.h>
 #include "util/u_math.h"
 #include "iris_binder.h"
@@ -84,6 +106,7 @@ iris_border_color_pool_reserve(struct iris_context *ice, unsigned count)
       (IRIS_BORDER_COLOR_POOL_SIZE - pool->insert_point) / BC_ALIGNMENT;
 
    if (remaining_entries < count) {
+      /* It's safe to flush because we're called outside of state upload. */
       if (iris_batch_references(&ice->render_batch, pool->bo))
          iris_batch_flush(&ice->render_batch);
 
@@ -94,7 +117,8 @@ iris_border_color_pool_reserve(struct iris_context *ice, unsigned count)
 /**
  * Upload a border color (or use a cached version).
  *
- * Returns the offset into the border color pool BO.
+ * Returns the offset into the border color pool BO.  Note that you must
+ * reserve space ahead of time by calling iris_border_color_pool_reserve().
  */
 uint32_t
 iris_upload_border_color(struct iris_context *ice,
diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c
index da770ee7750..305da8afed1 100644
--- a/src/gallium/drivers/iris/iris_bufmgr.c
+++ b/src/gallium/drivers/iris/iris_bufmgr.c
@@ -21,6 +21,17 @@
  * IN THE SOFTWARE.
  */
 
+/**
+ * @file iris_bufmgr.c
+ *
+ * The Iris buffer manager.
+ *
+ * XXX: write better comments
+ * - BOs
+ * - Explain BO cache
+ * - main interface to GEM in the kernel
+ */
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -380,7 +391,7 @@ __vma_alloc(struct iris_bufmgr *bufmgr,
       addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
                                  alignment);
    }
-   
+
    assert((addr >> 48ull) == 0);
    assert((addr % alignment) == 0);
    return addr;
diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c
index 41ac727976d..91a161b88c6 100644
--- a/src/gallium/drivers/iris/iris_clear.c
+++ b/src/gallium/drivers/iris/iris_clear.c
@@ -35,6 +35,11 @@
 #include "iris_screen.h"
 #include "intel/compiler/brw_compiler.h"
 
+/**
+ * The pipe->clear() driver hook.
+ *
+ * This clears buffers attached to the current draw framebuffer.
+ */
 static void
 iris_clear(struct pipe_context *ctx,
            unsigned buffers,
diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c
index d0bc9e5e8dc..5e69e8a44d9 100644
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -42,6 +42,7 @@ iris_flush(struct pipe_context *ctx,
 
    iris_batch_flush(&ice->render_batch);
 
+   // XXX: bogus!!!
    if (fence)
       *fence = NULL;
 }
@@ -59,6 +60,9 @@ get_time(void)
    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
 }
 
+/**
+ * The pipe->set_debug_callback() driver hook.
+ */
 static void
 iris_set_debug_callback(struct pipe_context *ctx,
                         const struct pipe_debug_callback *cb)
@@ -71,6 +75,9 @@ iris_set_debug_callback(struct pipe_context *ctx,
       memset(&ice->dbg, 0, sizeof(ice->dbg));
 }
 
+/**
+ * Destroy a context, freeing any associated memory.
+ */
 static void
 iris_destroy_context(struct pipe_context *ctx)
 {
@@ -103,6 +110,11 @@ iris_destroy_context(struct pipe_context *ctx)
       unreachable("Unknown hardware generation"); \
    }
 
+/**
+ * Create a context.
+ *
+ * This is where each context begins.
+ */
 struct pipe_context *
 iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 0708410a7fb..5eee587990a 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -38,16 +38,22 @@ struct iris_context;
 struct blorp_batch;
 struct blorp_params;
 
-#define IRIS_RESOURCE_FLAG_SHADER_MEMZONE  (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
-#define IRIS_RESOURCE_FLAG_SURFACE_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
-#define IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
-
 #define IRIS_MAX_TEXTURE_SAMPLERS 32
 /* IRIS_MAX_ABOS and IRIS_MAX_SSBOS must be the same. */
 #define IRIS_MAX_ABOS 16
 #define IRIS_MAX_SSBOS 16
 #define IRIS_MAX_VIEWPORTS 16
 
+/**
+ * Dirty flags.  When state changes, we flag some combination of these
+ * to indicate that particular GPU commands need to be re-emitted.
+ *
+ * Each bit typically corresponds to a single 3DSTATE_* command packet, but
+ * in rare cases they map to a group of related packets that need to be
+ * emitted together.
+ *
+ * See iris_upload_render_state().
+ */
 #define IRIS_DIRTY_COLOR_CALC_STATE         (1ull <<  0)
 #define IRIS_DIRTY_POLYGON_STIPPLE          (1ull <<  1)
 #define IRIS_DIRTY_SCISSOR_RECT             (1ull <<  2)
@@ -106,7 +112,9 @@ struct blorp_params;
  *
  * Shader programs may depend on non-orthogonal state.  These flags are
  * used to indicate that a shader's key depends on the state provided by
- * a certain Gallium CSO.
+ * a certain Gallium CSO.  Changing any CSOs marked as a dependency will
+ * cause the driver to re-compute the shader key, possibly triggering a
+ * shader recompile.
  */
 enum iris_nos_dep {
    IRIS_NOS_FRAMEBUFFER,
@@ -119,6 +127,9 @@ enum iris_nos_dep {
 
 struct iris_depth_stencil_alpha_state;
 
+/**
+ * Cache IDs for the in-memory program cache (ice->shaders.cache).
+ */
 enum iris_program_cache_id {
    IRIS_CACHE_VS  = MESA_SHADER_VERTEX,
    IRIS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
@@ -131,11 +142,11 @@ enum iris_program_cache_id {
 
 /** @{
  *
- * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
- * additional flushing control.
+ * Defines for PIPE_CONTROL operations, which trigger cache flushes,
+ * synchronization, pipelined memory writes, and so on.
  *
- * The bits here are not the actual hardware values.  The actual values
- * shift around a bit per-generation, so we just have flags for each
+ * The bits here are not the actual hardware values.  The actual fields
+ * move between various generations, so we just have flags for each
  * potential operation, and use genxml to encode the actual packet.
  */
 enum pipe_control_flags
@@ -180,6 +191,13 @@ enum pipe_control_flags
 
 /** @} */
 
+/**
+ * A compiled shader variant, containing a pointer to the GPU assembly,
+ * as well as program data and other packets needed by state upload.
+ *
+ * There can be several iris_compiled_shader variants per API-level shader
+ * (iris_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
+ */
 struct iris_compiled_shader {
    /** Reference to the uploaded assembly. */
    struct iris_state_ref assembly;
@@ -203,6 +221,9 @@ struct iris_compiled_shader {
    uint8_t derived_data[0];
 };
 
+/**
+ * Constant buffer (UBO) information.  See iris_set_const_buffer().
+ */
 struct iris_const_buffer {
    /** The resource and offset for the actual constant data */
    struct iris_state_ref data;
@@ -211,12 +232,18 @@ struct iris_const_buffer {
    struct iris_state_ref surface_state;
 };
 
+/**
+ * API context state that is replicated per shader stage.
+ */
 struct iris_shader_state {
    struct iris_const_buffer constbuf[PIPE_MAX_CONSTANT_BUFFERS];
    struct pipe_resource *ssbo[PIPE_MAX_SHADER_BUFFERS];
    struct iris_state_ref ssbo_surface_state[PIPE_MAX_SHADER_BUFFERS];
 };
 
+/**
+ * Virtual table for generation-specific (genxml) function calls.
+ */
 struct iris_vtable {
    void (*destroy_state)(struct iris_context *ice);
    void (*init_render_context)(struct iris_screen *screen,
@@ -248,6 +275,11 @@ struct iris_vtable {
                            struct brw_wm_prog_key *key);
 };
 
+/**
+ * A pool containing SAMPLER_BORDER_COLOR_STATE entries.
+ *
+ * See iris_border_color.c for more information.
+ */
 struct iris_border_color_pool {
    struct iris_bo *bo;
    void *map;
@@ -257,15 +289,27 @@ struct iris_border_color_pool {
    struct hash_table *ht;
 };
 
+/**
+ * The API context (derived from pipe_context).
+ *
+ * Most driver state is tracked here.
+ */
 struct iris_context {
    struct pipe_context ctx;
 
+   /** A debug callback for KHR_debug output. */
    struct pipe_debug_callback dbg;
 
+   /** Slab allocator for iris_transfer_map objects. */
    struct slab_child_pool transfer_pool;
 
    struct iris_vtable vtbl;
 
+   struct blorp_context blorp;
+
+   /** The main batch for rendering. */
+   struct iris_batch render_batch;
+
    struct {
       struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
       struct iris_compiled_shader *prog[MESA_SHADER_STAGES];
@@ -279,14 +323,10 @@ struct iris_context {
       unsigned urb_size;
    } shaders;
 
-   struct blorp_context blorp;
-
-   /** The main batch for rendering */
-   struct iris_batch render_batch;
-
    struct {
       uint64_t dirty;
       uint64_t dirty_for_nos[IRIS_NOS_COUNT];
+
       unsigned num_viewports;
       unsigned sample_mask;
       struct iris_blend_state *cso_blend;
@@ -300,6 +340,7 @@ struct iris_context {
       struct pipe_stencil_ref stencil_ref;
       struct pipe_framebuffer_state framebuffer;
 
+      /** GenX-specific current state */
       struct iris_genx_state *genx;
 
       struct iris_state_ref sampler_table[MESA_SHADER_STAGES];
diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c
index 1a12e4ce0d9..9c58bbff5ff 100644
--- a/src/gallium/drivers/iris/iris_draw.c
+++ b/src/gallium/drivers/iris/iris_draw.c
@@ -20,6 +20,13 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_draw.c
+ *
+ * The main driver hooks for drawing and launching compute shaders.
+ */
+
 #include <stdio.h>
 #include <errno.h>
 #include "pipe/p_defines.h"
@@ -31,6 +38,9 @@
 #include "intel/compiler/brw_compiler.h"
 #include "iris_context.h"
 
+/**
+ * The pipe->draw_vbo() driver hook.  Performs a draw on the GPU.
+ */
 void
 iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
diff --git a/src/gallium/drivers/iris/iris_formats.c b/src/gallium/drivers/iris/iris_formats.c
index 1bfc367232f..38b44be515c 100644
--- a/src/gallium/drivers/iris/iris_formats.c
+++ b/src/gallium/drivers/iris/iris_formats.c
@@ -21,6 +21,13 @@
  * IN THE SOFTWARE.
  */
 
+/**
+ * @file iris_formats.c
+ *
+ * Converts Gallium formats (PIPE_FORMAT_*) to hardware ones (ISL_FORMAT_*).
+ * Provides information about which formats support what features.
+ */
+
 #include "util/bitscan.h"
 #include "util/macros.h"
 #include "util/u_format.h"
@@ -391,6 +398,12 @@ iris_isl_format_for_pipe_format(enum pipe_format pf)
    return table[pf];
 }
 
+/**
+ * The pscreen->is_format_supported() driver hook.
+ *
+ * Returns true if the given format is supported for the given usage
+ * (PIPE_BIND_*) and sample count.
+ */
 boolean
 iris_is_format_supported(struct pipe_screen *pscreen,
                          enum pipe_format pformat,
diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c
index 1dcb0ad3e62..db1422f0b6e 100644
--- a/src/gallium/drivers/iris/iris_pipe_control.c
+++ b/src/gallium/drivers/iris/iris_pipe_control.c
@@ -21,6 +21,35 @@
  * IN THE SOFTWARE.
  */
 
+/**
+ * @file iris_pipe_control.c
+ *
+ * PIPE_CONTROL is the main flushing and synchronization primitive on Intel
+ * GPUs.  It can invalidate caches, stall until rendering reaches various
+ * stages of completion, write to memory, and other things.  In a way, it's
+ * a swiss army knife command - it has all kinds of capabilities, but some
+ * significant limitations as well.
+ *
+ * Unfortunately, it's notoriously complicated and difficult to use.  Many
+ * sub-commands can't be used together.  Some are meant to be used at the
+ * top of the pipeline (invalidating caches before drawing), while some are
+ * meant to be used at the end (stalling or flushing after drawing).
+ *
+ * Also, there's a list of restrictions a mile long, which vary by generation.
+ * Do this before doing that, or suffer the consequences (usually a GPU hang).
+ *
+ * This file contains helpers for emitting them safely.  You can simply call
+ * iris_emit_pipe_control_flush() with the desired operations (as logical
+ * PIPE_CONTROL_* bits), and it will take care of splitting it into multiple
+ * PIPE_CONTROL commands as necessary.  The per-generation workarounds are
+ * applied in iris_emit_raw_pipe_control() in iris_state.c.
+ *
+ * This file also contains our cache tracking helpers.  We have sets for
+ * the render cache, depth cache, and so on.  If a BO is in the set, then
+ * it may have data in that cache.  These take care of emitting flushes for
+ * render-to-texture, format reinterpretation issues, and other situations.
+ */
+
 #include "iris_context.h"
 #include "util/hash_table.h"
 #include "util/set.h"
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 070981ceafb..39bb53602c1 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -20,6 +20,16 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_program.c
+ *
+ * This file contains the driver interface for compiling shaders.
+ *
+ * See iris_program_cache.c for the in-memory program cache where the
+ * compiled shaders are stored.
+ */
+
 #include <stdio.h>
 #include <errno.h>
 #include "pipe/p_defines.h"
@@ -39,6 +49,15 @@ get_new_program_id(struct iris_screen *screen)
    return p_atomic_inc_return(&screen->program_id);
 }
 
+/**
+ * An uncompiled, API-facing shader.  This is the Gallium CSO for shaders.
+ * It primarily contains the NIR for the shader.
+ *
+ * Each API-facing shader can be compiled into multiple shader variants,
+ * based on non-orthogonal state dependencies, recorded in the shader key.
+ *
+ * See iris_compiled_shader, which represents a compiled shader variant.
+ */
 struct iris_uncompiled_shader {
    nir_shader *nir;
 
@@ -52,6 +71,14 @@ struct iris_uncompiled_shader {
 
 // XXX: need unify_interfaces() at link time...
 
+/**
+ * The pipe->create_[stage]_state() driver hooks.
+ *
+ * Performs basic NIR preprocessing, records any state dependencies, and
+ * returns an iris_uncompiled_shader as the Gallium CSO.
+ *
+ * Actual shader compilation to assembly happens later, at first use.
+ */
 static void *
 iris_create_shader_state(struct pipe_context *ctx,
                          const struct pipe_shader_state *state)
@@ -101,9 +128,16 @@ iris_create_shader_state(struct pipe_context *ctx,
       break;
    }
 
+   // XXX: precompile!
+
    return ish;
 }
 
+/**
+ * The pipe->delete_[stage]_state() driver hooks.
+ *
+ * Frees the iris_uncompiled_shader.
+ */
 static void
 iris_delete_shader_state(struct pipe_context *ctx, void *state)
 {
@@ -113,6 +147,12 @@ iris_delete_shader_state(struct pipe_context *ctx, void *state)
    free(ish);
 }
 
+/**
+ * The pipe->bind_[stage]_state() driver hook.
+ *
+ * Binds an uncompiled shader as the current one for a particular stage.
+ * Updates dirty tracking to account for the shader's NOS.
+ */
 static void
 bind_state(struct iris_context *ice,
            struct iris_uncompiled_shader *ish,
@@ -124,6 +164,9 @@ bind_state(struct iris_context *ice,
    ice->shaders.uncompiled[stage] = ish;
    ice->state.dirty |= dirty_bit;
 
+   /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
+    * (or that they no longer need to do so).
+    */
    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
       if (nos & (1 << i))
          ice->state.dirty_for_nos[i] |= dirty_bit;
@@ -149,6 +192,7 @@ iris_bind_tes_state(struct pipe_context *ctx, void *state)
 {
    struct iris_context *ice = (struct iris_context *)ctx;
 
+   /* Enabling/disabling optional stages requires a URB reconfiguration. */
    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
       ice->state.dirty |= IRIS_DIRTY_URB;
 
@@ -160,6 +204,7 @@ iris_bind_gs_state(struct pipe_context *ctx, void *state)
 {
    struct iris_context *ice = (struct iris_context *)ctx;
 
+   /* Enabling/disabling optional stages requires a URB reconfiguration. */
    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
       ice->state.dirty |= IRIS_DIRTY_URB;
 
@@ -244,6 +289,11 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
    return next_binding_table_offset;
 }
 
+/**
+ * Associate NIR uniform variables with the prog_data->param[] mechanism
+ * used by the backend.  Also, decide which UBOs we'd like to push in an
+ * ideal situation (though the backend can reduce this).
+ */
 static void
 iris_setup_uniforms(const struct brw_compiler *compiler,
                     void *mem_ctx,
@@ -266,6 +316,11 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
 }
 
+/**
+ * If we still have regular uniforms as push constants after the backend
+ * compilation, set up a UBO range for them.  This will be used to fill
+ * out the 3DSTATE_CONSTANT_* packets which cause the data to be pushed.
+ */
 static void
 iris_setup_push_uniform_range(const struct brw_compiler *compiler,
                               struct brw_stage_prog_data *prog_data)
@@ -282,6 +337,9 @@ iris_setup_push_uniform_range(const struct brw_compiler *compiler,
    }
 }
 
+/**
+ * Compile a vertex shader, and upload the assembly.
+ */
 static bool
 iris_compile_vs(struct iris_context *ice,
                 struct iris_uncompiled_shader *ish,
@@ -330,6 +388,11 @@ iris_compile_vs(struct iris_context *ice,
    return true;
 }
 
+/**
+ * Update the current vertex shader variant.
+ *
+ * Fill out the key, look in the cache, compile and bind if needed.
+ */
 static void
 iris_update_compiled_vs(struct iris_context *ice)
 {
@@ -345,6 +408,9 @@ iris_update_compiled_vs(struct iris_context *ice)
    UNUSED bool success = iris_compile_vs(ice, ish, &key);
 }
 
+/**
+ * Get the shader_info for a given stage, or NULL if the stage is disabled.
+ */
 const struct shader_info *
 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
 {
@@ -388,6 +454,9 @@ get_unified_tess_slots(const struct iris_context *ice,
    }
 }
 
+/**
+ * Compile a tessellation control shader, and upload the assembly.
+ */
 static bool
 iris_compile_tcs(struct iris_context *ice,
                  struct iris_uncompiled_shader *ish,
@@ -427,6 +496,11 @@ iris_compile_tcs(struct iris_context *ice,
    return true;
 }
 
+/**
+ * Update the current tessellation control shader variant.
+ *
+ * Fill out the key, look in the cache, compile and bind if needed.
+ */
 static void
 iris_update_compiled_tcs(struct iris_context *ice)
 {
@@ -458,6 +532,9 @@ iris_update_compiled_tcs(struct iris_context *ice)
    UNUSED bool success = iris_compile_tcs(ice, tcs, &key);
 }
 
+/**
+ * Compile a tessellation evaluation shader, and upload the assembly.
+ */
 static bool
 iris_compile_tes(struct iris_context *ice,
                  struct iris_uncompiled_shader *ish,
@@ -505,6 +582,11 @@ iris_compile_tes(struct iris_context *ice,
    return true;
 }
 
+/**
+ * Update the current tessellation evaluation shader variant.
+ *
+ * Fill out the key, look in the cache, compile and bind if needed.
+ */
 static void
 iris_update_compiled_tes(struct iris_context *ice)
 {
@@ -526,6 +608,9 @@ iris_update_compiled_tes(struct iris_context *ice)
    UNUSED bool success = iris_compile_tes(ice, ish, &key);
 }
 
+/**
+ * Compile a geometry shader, and upload the assembly.
+ */
 static bool
 iris_compile_gs(struct iris_context *ice,
                 struct iris_uncompiled_shader *ish,
@@ -573,7 +658,11 @@ iris_compile_gs(struct iris_context *ice,
    return true;
 }
 
-
+/**
+ * Update the current geometry shader variant.
+ *
+ * Fill out the key, look in the cache, compile and bind if needed.
+ */
 static void
 iris_update_compiled_gs(struct iris_context *ice)
 {
@@ -594,6 +683,9 @@ iris_update_compiled_gs(struct iris_context *ice)
    UNUSED bool success = iris_compile_gs(ice, ish, &key);
 }
 
+/**
+ * Compile a fragment (pixel) shader, and upload the assembly.
+ */
 static bool
 iris_compile_fs(struct iris_context *ice,
                 struct iris_uncompiled_shader *ish,
@@ -637,6 +729,11 @@ iris_compile_fs(struct iris_context *ice,
    return true;
 }
 
+/**
+ * Update the current fragment shader variant.
+ *
+ * Fill out the key, look in the cache, compile and bind if needed.
+ */
 static void
 iris_update_compiled_fs(struct iris_context *ice)
 {
@@ -652,6 +749,11 @@ iris_update_compiled_fs(struct iris_context *ice)
       iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);
 }
 
+/**
+ * Get the compiled shader for the last enabled geometry stage.
+ *
+ * This stage is the one which will feed stream output and the rasterizer.
+ */
 static struct iris_compiled_shader *
 last_vue_shader(struct iris_context *ice)
 {
@@ -664,6 +766,12 @@ last_vue_shader(struct iris_context *ice)
    return ice->shaders.prog[MESA_SHADER_VERTEX];
 }
 
+/**
+ * Update the last enabled stage's VUE map.
+ *
+ * When the shader feeding the rasterizer's output interface changes, we
+ * need to re-emit various packets.
+ */
 static void
 update_last_vue_map(struct iris_context *ice,
                     struct brw_stage_prog_data *prog_data)
@@ -692,6 +800,9 @@ update_last_vue_map(struct iris_context *ice,
    ice->shaders.last_vue_map = &vue_prog_data->vue_map;
 }
 
+/**
+ * Get the prog_data for a given stage, or NULL if the stage is disabled.
+ */
 static struct brw_vue_prog_data *
 get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
 {
@@ -701,6 +812,13 @@ get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage)
    return (void *) ice->shaders.prog[stage]->prog_data;
 }
 
+/**
+ * Update the current shader variants for the given state.
+ *
+ * This should be called on every draw call to ensure that the correct
+ * shaders are bound.  It will also flag any dirty state triggered by
+ * swapping out those shaders.
+ */
 void
 iris_update_compiled_shaders(struct iris_context *ice)
 {
@@ -732,6 +850,7 @@ iris_update_compiled_shaders(struct iris_context *ice)
       iris_update_compiled_fs(ice);
    // ...
 
+   /* Changing shader interfaces may require a URB configuration. */
    if (!(dirty & IRIS_DIRTY_URB)) {
       for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
          struct brw_vue_prog_data *old = old_prog_datas[i];
diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c
index 7fe4464862f..b84e3ac0b4c 100644
--- a/src/gallium/drivers/iris/iris_program_cache.c
+++ b/src/gallium/drivers/iris/iris_program_cache.c
@@ -20,6 +20,15 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_program_cache.c
+ *
+ * The in-memory program cache.  This is basically a hash table mapping
+ * API-specified shaders and a state key to a compiled variant.  It also
+ * takes care of uploading shader assembly into a BO for use on the GPU.
+ */
+
 #include <stdio.h>
 #include <errno.h>
 #include "pipe/p_defines.h"
diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c
index 2057b1b5d44..2fc06fe5195 100644
--- a/src/gallium/drivers/iris/iris_query.c
+++ b/src/gallium/drivers/iris/iris_query.c
@@ -20,6 +20,13 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_query.c
+ *
+ * XXX: this file is EMPTY.  it will eventually implement query objects!
+ */
+
 #include <stdio.h>
 #include <errno.h>
 #include "pipe/p_defines.h"
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index 8e50ee2c699..79dcd94566d 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -20,6 +20,15 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_resource.c
+ *
+ * Resources are images, buffers, and other objects used by the GPU.
+ *
+ * XXX: explain resources
+ */
+
 #include <stdio.h>
 #include <errno.h>
 #include "pipe/p_defines.h"
@@ -336,7 +345,7 @@ iris_resource_from_handle(struct pipe_screen *pscreen,
       unreachable("invalid winsys handle type");
    }
    if (!res->bo)
-	   return NULL;
+      return NULL;
 
    uint64_t modifier = whandle->modifier;
    if (modifier == DRM_FORMAT_MOD_INVALID) {
diff --git a/src/gallium/drivers/iris/iris_resource.h b/src/gallium/drivers/iris/iris_resource.h
index 77a89f292f2..cf333cf0a24 100644
--- a/src/gallium/drivers/iris/iris_resource.h
+++ b/src/gallium/drivers/iris/iris_resource.h
@@ -27,6 +27,10 @@
 #include "util/u_inlines.h"
 #include "intel/isl/isl.h"
 
+#define IRIS_RESOURCE_FLAG_SHADER_MEMZONE  (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define IRIS_RESOURCE_FLAG_SURFACE_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+#define IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+
 /**
  * Resources represent a GPU buffer object or image (mipmap tree).
  *
@@ -71,6 +75,9 @@ struct iris_transfer {
    void (*unmap)(struct iris_transfer *);
 };
 
+/**
+ * Unwrap a pipe_resource to get the underlying iris_bo (for convenience).
+ */
 static inline struct iris_bo *
 iris_resource_bo(struct pipe_resource *p_res)
 {
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 4a464ab068e..c3bcd959ae2 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -20,6 +20,17 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_screen.c
+ *
+ * Screen related driver hooks and capability lists.
+ *
+ * A program may use multiple rendering contexts (iris_context), but
+ * they all share a common screen (iris_screen).  Global driver state
+ * can be stored in the screen; it may be accessed by multiple threads.
+ */
+
 #include <stdio.h>
 #include <errno.h>
 #include <sys/ioctl.h>
diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h
index 4798f2255db..463b191d131 100644
--- a/src/gallium/drivers/iris/iris_screen.h
+++ b/src/gallium/drivers/iris/iris_screen.h
@@ -39,18 +39,28 @@ struct iris_bo;
 struct iris_screen {
    struct pipe_screen base;
 
+   /** Global slab allocator for iris_transfer_map objects */
    struct slab_parent_pool transfer_pool;
 
+   /** drm device file descriptor */
    int fd;
+
+   /** PCI ID for our GPU device */
    int pci_id;
 
+   /** Global program_string_id counter (see get_program_string_id()) */
    unsigned program_id;
 
    struct gen_device_info devinfo;
    struct isl_device isl_dev;
    struct iris_bufmgr *bufmgr;
-   struct iris_bo *workaround_bo;
    struct brw_compiler *compiler;
+
+   /**
+    * A buffer containing nothing useful, for hardware workarounds that
+    * require scratch writes or reads from some unimportant memory.
+    */
+   struct iris_bo *workaround_bo;
 };
 
 struct pipe_screen *iris_screen_create(int fd);
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 76972307ea9..434626d2725 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -20,6 +20,57 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file iris_state.c
+ *
+ * ============================= GENXML CODE =============================
+ *              [This file is compiled once per generation.]
+ * =======================================================================
+ *
+ * This is the main state upload code.
+ *
+ * Gallium uses Constant State Objects, or CSOs, for most state.  Large,
+ * complex, or highly reusable state can be created once, and bound and
+ * rebound multiple times.  This is modeled with the pipe->create_*_state()
+ * and pipe->bind_*_state() hooks.  Highly dynamic or inexpensive state is
+ * streamed out on the fly, via pipe->set_*_state() hooks.
+ *
+ * OpenGL involves frequently mutating context state, which is mirrored in
+ * core Mesa by highly mutable data structures.  However, most applications
+ * typically draw the same things over and over - from frame to frame, most
+ * of the same objects are still visible and need to be redrawn.  So, rather
+ * than inventing new state all the time, applications usually mutate to swap
+ * between known states that we've seen before.
+ *
+ * Gallium isolates us from this mutation by tracking API state, and
+ * distilling it into a set of Constant State Objects, or CSOs.  Large,
+ * complex, or typically reusable state can be created once, then reused
+ * multiple times.  Drivers can create and store their own associated data.
+ * This create/bind model corresponds to the pipe->create_*_state() and
+ * pipe->bind_*_state() driver hooks.
+ *
+ * Some state is cheap to create, or expected to be highly dynamic.  Rather
+ * than creating and caching piles of CSOs for these, Gallium simply streams
+ * them out, via the pipe->set_*_state() driver hooks.
+ *
+ * To reduce draw time overhead, we try to compute as much state at create
+ * time as possible.  Wherever possible, we translate the Gallium pipe state
+ * to 3DSTATE commands, and store those commands in the CSO.  At draw time,
+ * we can simply memcpy them into a batch buffer.
+ *
+ * No hardware matches the abstraction perfectly, so some commands require
+ * information from multiple CSOs.  In this case, we can store two copies
+ * of the packet (one in each CSO), and simply | together their DWords at
+ * draw time.  Sometimes the second set is trivial (one or two fields), so
+ * we simply pack it at draw time.
+ *
+ * There are two main components in the file below.  First, the CSO hooks
+ * create/bind/track state.  The second are the draw-time upload functions,
+ * iris_upload_render_state() and iris_upload_compute_state(), which read
+ * the context state and emit the commands into the actual batch.
+ */
+
 #include <stdio.h>
 #include <errno.h>
 
@@ -112,6 +163,10 @@ __gen_combine_address(struct iris_batch *batch, void *location,
 
 #define MOCS_WB (2 << 1)
 
+/**
+ * Statically assert that PIPE_* enums match the hardware packets.
+ * (As long as they match, we don't need to translate them.)
+ */
 UNUSED static void pipe_asserts()
 {
 #define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
@@ -270,20 +325,59 @@ translate_fill_mode(unsigned pipe_polymode)
    return map[pipe_polymode];
 }
 
+static unsigned
+translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
+{
+   static const unsigned map[] = {
+      [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
+      [PIPE_TEX_MIPFILTER_LINEAR]  = MIPFILTER_LINEAR,
+      [PIPE_TEX_MIPFILTER_NONE]    = MIPFILTER_NONE,
+   };
+   return map[pipe_mip];
+}
+
+static uint32_t
+translate_wrap(unsigned pipe_wrap)
+{
+   static const unsigned map[] = {
+      [PIPE_TEX_WRAP_REPEAT]                 = TCM_WRAP,
+      [PIPE_TEX_WRAP_CLAMP]                  = TCM_HALF_BORDER,
+      [PIPE_TEX_WRAP_CLAMP_TO_EDGE]          = TCM_CLAMP,
+      [PIPE_TEX_WRAP_CLAMP_TO_BORDER]        = TCM_CLAMP_BORDER,
+      [PIPE_TEX_WRAP_MIRROR_REPEAT]          = TCM_MIRROR,
+      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE]   = TCM_MIRROR_ONCE,
+
+      /* These are unsupported. */
+      [PIPE_TEX_WRAP_MIRROR_CLAMP]           = -1,
+      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
+   };
+   return map[pipe_wrap];
+}
+
 static struct iris_address
 ro_bo(struct iris_bo *bo, uint64_t offset)
 {
-   /* Not for CSOs! */
+   /* CSOs must pass NULL for bo!  Otherwise it will add the BO to the
+    * validation list at CSO creation time, instead of draw time.
+    */
    return (struct iris_address) { .bo = bo, .offset = offset };
 }
 
 static struct iris_address
 rw_bo(struct iris_bo *bo, uint64_t offset)
 {
-   /* Not for CSOs! */
+   /* CSOs must pass NULL for bo!  Otherwise it will add the BO to the
+    * validation list at CSO creation time, instead of draw time.
+    */
    return (struct iris_address) { .bo = bo, .offset = offset, .write = true };
 }
 
+/**
+ * Allocate space for some indirect state.
+ *
+ * Return a pointer to the map (to fill it out) and a state ref (for
+ * referring to the state in GPU commands).
+ */
 static void *
 upload_state(struct u_upload_mgr *uploader,
              struct iris_state_ref *ref,
@@ -295,6 +389,13 @@ upload_state(struct u_upload_mgr *uploader,
    return p;
 }
 
+/**
+ * Stream out temporary/short-lived state.
+ *
+ * This allocates space, pins the BO, and includes the BO address in the
+ * returned offset (which works because all state lives in 32-bit memory
+ * zones).
+ */
 static uint32_t *
 stream_state(struct iris_batch *batch,
              struct u_upload_mgr *uploader,
@@ -315,6 +416,9 @@ stream_state(struct iris_batch *batch,
    return ptr;
 }
 
+/**
+ * stream_state() + memcpy.
+ */
 static uint32_t
 emit_state(struct iris_batch *batch,
            struct u_upload_mgr *uploader,
@@ -333,10 +437,21 @@ emit_state(struct iris_batch *batch,
    return offset;
 }
 
+/**
+ * Did field 'x' change between 'old_cso' and 'new_cso'?
+ *
+ * (If so, we may want to set some dirty flags.)
+ */
 #define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x))
 #define cso_changed_memcmp(x) \
    (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)
 
+/**
+ * Upload the initial GPU state for a render context.
+ *
+ * This sets some invariant state that needs to be programmed a particular
+ * way, but we never actually change.
+ */
 static void
 iris_init_render_context(struct iris_screen *screen,
                          struct iris_batch *batch,
@@ -347,6 +462,10 @@ iris_init_render_context(struct iris_screen *screen,
 
    /* XXX: PIPE_CONTROLs */
 
+   /* We program STATE_BASE_ADDRESS once at context initialization time.
+    * Each base address points at a 4GB memory zone, and never needs to
+    * change.  See iris_bufmgr.h for a description of the memory zones.
+    */
    iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
    #if 0
    // XXX: MOCS is stupid for this.
@@ -380,10 +499,17 @@ iris_init_render_context(struct iris_screen *screen,
       sba.DynamicStateBufferSize   = 0xfffff;
    }
 
+   /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid
+    * changing it dynamically.  We set it to the maximum size here, and
+    * instead include the render target dimensions in the viewport, so
+    * viewport extents clipping takes care of pruning stray geometry.
+    */
    iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
       rect.ClippedDrawingRectangleXMax = UINT16_MAX;
       rect.ClippedDrawingRectangleYMax = UINT16_MAX;
    }
+
+   /* Set the initial MSAA sample positions. */
    iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
       GEN_SAMPLE_POS_1X(pat._1xSample);
       GEN_SAMPLE_POS_2X(pat._2xSample);
@@ -391,13 +517,22 @@ iris_init_render_context(struct iris_screen *screen,
       GEN_SAMPLE_POS_8X(pat._8xSample);
       GEN_SAMPLE_POS_16X(pat._16xSample);
    }
+
+   /* Use the legacy AA line coverage computation. */
    iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
+
+   /* Disable chromakeying (it's for media) */
    iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
+
+   /* We want regular rendering, not special HiZ operations. */
    iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
-   /* XXX: may need to set an offset for origin-UL framebuffers */
+
+   /* No polygon stippling offsets are necessary. */
+   // XXX: may need to set an offset for origin-UL framebuffers
    iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
 
-   /* Just assign a static partitioning. */
+   /* Set a static partitioning of the push constant area. */
+   // XXX: this may be a bad idea...could starve the push ringbuffers...
    for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
       iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
          alloc._3DCommandSubOpcode = 18 + i;
@@ -408,12 +543,18 @@ iris_init_render_context(struct iris_screen *screen,
 }
 
 struct iris_vertex_buffer_state {
+   /** The 3DSTATE_VERTEX_BUFFERS hardware packet. */
    uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
+
+   /** The resource to source vertex data from. */
    struct pipe_resource *resources[33];
+
+   /** The number of bound vertex buffers. */
    unsigned num_buffers;
 };
 
 struct iris_depth_buffer_state {
+   /* Depth/HiZ/Stencil related hardware packets. */
    uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
                     GENX(3DSTATE_STENCIL_BUFFER_length) +
                     GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
@@ -421,8 +562,10 @@ struct iris_depth_buffer_state {
 };
 
 /**
- * State that can't be stored directly in iris_context because the data
- * layout varies per generation.
+ * Generation-specific context state (ice->state.genx->...).
+ *
+ * Most state can go in iris_context directly, but these encode hardware
+ * packets which vary by generation.
  */
 struct iris_genx_state {
    /** SF_CLIP_VIEWPORT */
@@ -435,21 +578,31 @@ struct iris_genx_state {
    uint32_t streamout[4 * GENX(3DSTATE_STREAMOUT_length)];
 };
 
+// XXX: move this to iris_draw.c
 static void
 iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
 {
 }
 
+/**
+ * The pipe->set_blend_color() driver hook.
+ *
+ * This corresponds to our COLOR_CALC_STATE.
+ */
 static void
 iris_set_blend_color(struct pipe_context *ctx,
                      const struct pipe_blend_color *state)
 {
    struct iris_context *ice = (struct iris_context *) ctx;
 
+   /* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */
    memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
    ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
 }
 
+/**
+ * Gallium CSO for blend state (see pipe_blend_state).
+ */
 struct iris_blend_state {
    /** Partial 3DSTATE_PS_BLEND */
    uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
@@ -461,6 +614,11 @@ struct iris_blend_state {
    bool alpha_to_coverage; /* for shader key */
 };
 
+/**
+ * The pipe->create_blend_state() driver hook.
+ *
+ * Translates a pipe_blend_state into iris_blend_state.
+ */
 static void *
 iris_create_blend_state(struct pipe_context *ctx,
                         const struct pipe_blend_state *state)
@@ -525,6 +683,11 @@ iris_create_blend_state(struct pipe_context *ctx,
    return cso;
 }
 
+/**
+ * The pipe->bind_blend_state() driver hook.
+ *
+ * Bind a blending CSO and flag related dirty bits.
+ */
 static void
 iris_bind_blend_state(struct pipe_context *ctx, void *state)
 {
@@ -535,14 +698,23 @@ iris_bind_blend_state(struct pipe_context *ctx, void *state)
    ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_BLEND];
 }
 
+/**
+ * Gallium CSO for depth, stencil, and alpha testing state.
+ */
 struct iris_depth_stencil_alpha_state {
-   /** Partial 3DSTATE_WM_DEPTH_STENCIL */
+   /** Partial 3DSTATE_WM_DEPTH_STENCIL. */
    uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
 
-   /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE */
+   /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE. */
    struct pipe_alpha_state alpha;
 };
 
+/**
+ * The pipe->create_depth_stencil_alpha_state() driver hook.
+ *
+ * We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha
+ * testing state since we need pieces of it in a variety of places.
+ */
 static void *
 iris_create_zsa_state(struct pipe_context *ctx,
                       const struct pipe_depth_stencil_alpha_state *state)
@@ -586,6 +758,11 @@ iris_create_zsa_state(struct pipe_context *ctx,
    return cso;
 }
 
+/**
+ * The pipe->bind_depth_stencil_alpha_state() driver hook.
+ *
+ * Bind a depth/stencil/alpha CSO and flag related dirty bits.
+ */
 static void
 iris_bind_zsa_state(struct pipe_context *ctx, void *state)
 {
@@ -610,6 +787,9 @@ iris_bind_zsa_state(struct pipe_context *ctx, void *state)
    ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_DEPTH_STENCIL_ALPHA];
 }
 
+/**
+ * Gallium CSO for rasterizer state.
+ */
 struct iris_rasterizer_state {
    uint32_t sf[GENX(3DSTATE_SF_length)];
    uint32_t clip[GENX(3DSTATE_CLIP_length)];
@@ -634,6 +814,9 @@ struct iris_rasterizer_state {
    uint16_t sprite_coord_enable;
 };
 
+/**
+ * The pipe->create_rasterizer_state() driver hook.
+ */
 static void *
 iris_create_rasterizer_state(struct pipe_context *ctx,
                              const struct pipe_rasterizer_state *state)
@@ -654,6 +837,9 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
    }
    #endif
 
+   // XXX: it may make more sense just to store the pipe_rasterizer_state,
+   // we're copying a lot of booleans here.  But we don't need all of them...
+
    cso->multisample = state->multisample;
    cso->force_persample_interp = state->force_persample_interp;
    cso->clip_halfz = state->clip_halfz;
@@ -760,6 +946,11 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
    return cso;
 }
 
+/**
+ * The pipe->bind_rasterizer_state() driver hook.
+ *
+ * Bind a rasterizer CSO and flag related dirty bits.
+ */
 static void
 iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
 {
@@ -795,26 +986,10 @@ iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
    ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_RASTERIZER];
 }
 
-static uint32_t
-translate_wrap(unsigned pipe_wrap)
-{
-   static const unsigned map[] = {
-      [PIPE_TEX_WRAP_REPEAT]                 = TCM_WRAP,
-      [PIPE_TEX_WRAP_CLAMP]                  = TCM_HALF_BORDER,
-      [PIPE_TEX_WRAP_CLAMP_TO_EDGE]          = TCM_CLAMP,
-      [PIPE_TEX_WRAP_CLAMP_TO_BORDER]        = TCM_CLAMP_BORDER,
-      [PIPE_TEX_WRAP_MIRROR_REPEAT]          = TCM_MIRROR,
-      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE]   = TCM_MIRROR_ONCE,
-
-      /* These are unsupported. */
-      [PIPE_TEX_WRAP_MIRROR_CLAMP]           = -1,
-      [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
-   };
-   return map[pipe_wrap];
-}
-
 /**
  * Return true if the given wrap mode requires the border color to exist.
+ *
+ * (We can skip uploading it if the sampler isn't going to use it.)
  */
 static bool
 wrap_mode_needs_border_color(unsigned wrap_mode)
@@ -822,18 +997,11 @@ wrap_mode_needs_border_color(unsigned wrap_mode)
    return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
 }
 
-static unsigned
-translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
-{
-   static const unsigned map[] = {
-      [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
-      [PIPE_TEX_MIPFILTER_LINEAR]  = MIPFILTER_LINEAR,
-      [PIPE_TEX_MIPFILTER_NONE]    = MIPFILTER_NONE,
-   };
-   return map[pipe_mip];
-}
-
+/**
+ * Gallium CSO for sampler state.
+ */
 struct iris_sampler_state {
+   // XXX: do we need this
    struct pipe_sampler_state base;
 
    bool needs_border_color;
@@ -841,6 +1009,14 @@ struct iris_sampler_state {
    uint32_t sampler_state[GENX(SAMPLER_STATE_length)];
 };
 
+/**
+ * The pipe->create_sampler_state() driver hook.
+ *
+ * We fill out SAMPLER_STATE (except for the border color pointer), and
+ * store that on the CPU.  It doesn't make sense to upload it to a GPU
+ * buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires
+ * all bound sampler states to be in contiguous memor.
+ */
 static void *
 iris_create_sampler_state(struct pipe_context *ctx,
                           const struct pipe_sampler_state *state)
@@ -916,6 +1092,21 @@ iris_create_sampler_state(struct pipe_context *ctx,
    return cso;
 }
 
+/**
+ * The pipe->bind_sampler_states() driver hook.
+ *
+ * Now that we know all the sampler states, we upload them all into a
+ * contiguous area of GPU memory, for 3DSTATE_SAMPLER_STATE_POINTERS_*.
+ * We also fill out the border color state pointers at this point.
+ *
+ * We could defer this work to draw time, but we assume that binding
+ * will be less frequent than drawing.
+ */
+// XXX: this may be a bad idea, need to make sure that st/mesa calls us
+// XXX: with the complete set of shaders.  If it makes multiple calls to
+// XXX: things one at a time, we could waste a lot of time assembling things.
+// XXX: it doesn't even BUY us anything to do it here, because we only flag
+// XXX: IRIS_DIRTY_SAMPLER_STATE when this is called...
 static void
 iris_bind_sampler_states(struct pipe_context *ctx,
                          enum pipe_shader_type p_stage,
@@ -984,7 +1175,16 @@ iris_bind_sampler_states(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
 }
 
+/**
+ * Gallium CSO for sampler views (texture views).
+ *
+ * In addition to the normal pipe_resource, this adds an ISL view
+ * which may reinterpret the format or restrict levels/layers.
+ *
+ * These can also be linear texture buffers.
+ */
 struct iris_sampler_view {
+   // XXX: just store the resource, not the rest of this
    struct pipe_sampler_view pipe;
    struct isl_view view;
 
@@ -993,8 +1193,8 @@ struct iris_sampler_view {
 };
 
 /**
- * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the Gen7.5+
- * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
+ * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the HW's
+ * "Shader Channel Select" enumerations (i.e. SCS_RED).  The mappings are
  *
  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
  *         0          1          2          3             4            5
@@ -1002,8 +1202,6 @@ struct iris_sampler_view {
  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
  *
  * which is simply adding 4 then modding by 8 (or anding with 7).
- *
- * We then may need to apply workarounds for textureGather hardware bugs.
  */
 static enum isl_channel_select
 pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
@@ -1011,6 +1209,9 @@ pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
    return (swizzle + 4) & 7;
 }
 
+/**
+ * The pipe->create_sampler_view() driver hook.
+ */
 static struct pipe_sampler_view *
 iris_create_sampler_view(struct pipe_context *ctx,
                          struct pipe_resource *tex,
@@ -1052,6 +1253,7 @@ iris_create_sampler_view(struct pipe_context *ctx,
                (itex->surf.usage & ISL_SURF_USAGE_CUBE_BIT),
    };
 
+   /* Fill out SURFACE_STATE for this view. */
    if (tmpl->target != PIPE_BUFFER) {
       isv->view.base_level = tmpl->u.tex.first_level;
       isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1;
@@ -1084,6 +1286,22 @@ iris_create_sampler_view(struct pipe_context *ctx,
    return &isv->pipe;
 }
 
+static void
+iris_sampler_view_destroy(struct pipe_context *ctx,
+                          struct pipe_sampler_view *state)
+{
+   struct iris_sampler_view *isv = (void *) state;
+   pipe_resource_reference(&state->texture, NULL);
+   pipe_resource_reference(&isv->surface_state.res, NULL);
+   free(isv);
+}
+
+/**
+ * The pipe->create_surface() driver hook.
+ *
+ * In Gallium nomenclature, "surfaces" are a view of a resource that
+ * can be bound as a render target or depth/stencil buffer.
+ */
 static struct pipe_surface *
 iris_create_surface(struct pipe_context *ctx,
                     struct pipe_resource *tex,
@@ -1140,7 +1358,7 @@ iris_create_surface(struct pipe_context *ctx,
       .usage = usage,
    };
 
-   /* Bail early for depth/stencil */
+   /* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */
    if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT |
                           ISL_SURF_USAGE_STENCIL_BIT))
       return psurf;
@@ -1164,6 +1382,9 @@ iris_create_surface(struct pipe_context *ctx,
    return psurf;
 }
 
+/**
+ * The pipe->set_sampler_views() driver hook.
+ */
 static void
 iris_set_sampler_views(struct pipe_context *ctx,
                        enum pipe_shader_type p_stage,
@@ -1188,12 +1409,25 @@ iris_set_sampler_views(struct pipe_context *ctx,
    ice->state.dirty |= (IRIS_DIRTY_BINDINGS_VS << stage);
 }
 
+static void
+iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
+{
+   struct iris_surface *surf = (void *) p_surf;
+   pipe_resource_reference(&p_surf->texture, NULL);
+   pipe_resource_reference(&surf->surface_state.res, NULL);
+   free(surf);
+}
+
+// XXX: actually implement user clip planes
 static void
 iris_set_clip_state(struct pipe_context *ctx,
                     const struct pipe_clip_state *state)
 {
 }
 
+/**
+ * The pipe->set_polygon_stipple() driver hook.
+ */
 static void
 iris_set_polygon_stipple(struct pipe_context *ctx,
                          const struct pipe_poly_stipple *state)
@@ -1203,15 +1437,27 @@ iris_set_polygon_stipple(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE;
 }
 
+/**
+ * The pipe->set_sample_mask() driver hook.
+ */
 static void
 iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
    struct iris_context *ice = (struct iris_context *) ctx;
 
+   /* We only support 16x MSAA, so we have 16 bits of sample maks.
+    * st/mesa may pass us 0xffffffff though, meaning "enable all samples".
+    */
    ice->state.sample_mask = sample_mask & 0xffff;
    ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK;
 }
 
+/**
+ * The pipe->set_scissor_states() driver hook.
+ *
+ * This corresponds to our SCISSOR_RECT state structures.  It's an
+ * exact match, so we just store them, and memcpy them out later.
+ */
 static void
 iris_set_scissor_states(struct pipe_context *ctx,
                         unsigned start_slot,
@@ -1227,6 +1473,11 @@ iris_set_scissor_states(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT;
 }
 
+/**
+ * The pipe->set_stencil_ref() driver hook.
+ *
+ * This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time.
+ */
 static void
 iris_set_stencil_ref(struct pipe_context *ctx,
                      const struct pipe_stencil_ref *state)
@@ -1324,6 +1575,13 @@ calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
 }
 #endif
 
+/**
+ * The pipe->set_viewport_states() driver hook.
+ *
+ * This corresponds to our SF_CLIP_VIEWPORT states.  We can't calculate
+ * the guardband yet, as we need the framebuffer dimensions, but we can
+ * at least fill out the rest.
+ */
 static void
 iris_set_viewport_states(struct pipe_context *ctx,
                          unsigned start_slot,
@@ -1369,6 +1627,12 @@ iris_set_viewport_states(struct pipe_context *ctx,
       ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
 }
 
+/**
+ * The pipe->set_framebuffer_state() driver hook.
+ *
+ * Sets the current draw FBO, including color render targets, depth,
+ * and stencil buffers.
+ */
 static void
 iris_set_framebuffer_state(struct pipe_context *ctx,
                            const struct pipe_framebuffer_state *state)
@@ -1462,6 +1726,12 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
    ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_FRAMEBUFFER];
 }
 
+/**
+ * The pipe->set_constant_buffer() driver hook.
+ *
+ * This uploads any constant data in user buffers, and references
+ * any UBO resources containing constant data.
+ */
 static void
 iris_set_constant_buffer(struct pipe_context *ctx,
                          enum pipe_shader_type p_stage, unsigned index,
@@ -1513,6 +1783,12 @@ iris_set_constant_buffer(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
 }
 
+/**
+ * The pipe->set_shader_buffers() driver hook.
+ *
+ * This binds SSBOs and ABOs.  Unfortunately, we need to stream out
+ * SURFACE_STATE here, as the buffer offset may change each time.
+ */
 static void
 iris_set_shader_buffers(struct pipe_context *ctx,
                         enum pipe_shader_type p_stage,
@@ -1562,26 +1838,6 @@ iris_set_shader_buffers(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
 }
 
-static void
-iris_sampler_view_destroy(struct pipe_context *ctx,
-                          struct pipe_sampler_view *state)
-{
-   struct iris_sampler_view *isv = (void *) state;
-   pipe_resource_reference(&state->texture, NULL);
-   pipe_resource_reference(&isv->surface_state.res, NULL);
-   free(isv);
-}
-
-
-static void
-iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
-{
-   struct iris_surface *surf = (void *) p_surf;
-   pipe_resource_reference(&p_surf->texture, NULL);
-   pipe_resource_reference(&surf->surface_state.res, NULL);
-   free(surf);
-}
-
 static void
 iris_delete_state(struct pipe_context *ctx, void *state)
 {
@@ -1595,6 +1851,11 @@ iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso)
       pipe_resource_reference(&cso->resources[i], NULL);
 }
 
+/**
+ * The pipe->set_vertex_buffers() driver hook.
+ *
+ * This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet.
+ */
 static void
 iris_set_vertex_buffers(struct pipe_context *ctx,
                         unsigned start_slot, unsigned count,
@@ -1646,12 +1907,21 @@ iris_set_vertex_buffers(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
 }
 
+/**
+ * Gallium CSO for vertex elements.
+ */
 struct iris_vertex_element_state {
    uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
    uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)];
    unsigned count;
 };
 
+/**
+ * The pipe->create_vertex_elements() driver hook.
+ *
+ * This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS
+ * and 3DSTATE_VF_INSTANCING commands.  SGVs are handled at draw time.
+ */
 static void *
 iris_create_vertex_elements(struct pipe_context *ctx,
                             unsigned count,
@@ -1728,6 +1998,9 @@ iris_create_vertex_elements(struct pipe_context *ctx,
    return cso;
 }
 
+/**
+ * The pipe->bind_vertex_elements_state() driver hook.
+ */
 static void
 iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
 {
@@ -1735,6 +2008,9 @@ iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
    struct iris_vertex_element_state *old_cso = ice->state.cso_vertex_elements;
    struct iris_vertex_element_state *new_cso = state;
 
+   /* 3DSTATE_VF_SGVs overrides the last VE, so if the count is changing,
+    * we need to re-emit it to ensure we're overriding the right one.
+    */
    if (new_cso && cso_changed(count))
       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
 
@@ -1746,17 +2022,30 @@ static void *
 iris_create_compute_state(struct pipe_context *ctx,
                           const struct pipe_compute_state *state)
 {
+   // XXX: actually do something
    return malloc(1);
 }
 
+/**
+ * Gallium CSO for stream output (transform feedback) targets.
+ */
 struct iris_stream_output_target {
    struct pipe_stream_output_target base;
 
    uint32_t so_buffer[GENX(3DSTATE_SO_BUFFER_length)];
 
+   /** Storage holding the offset where we're writing in the buffer */
    struct iris_state_ref offset;
 };
 
+/**
+ * The pipe->create_stream_output_target() driver hook.
+ *
+ * "Target" here refers to a destination buffer.  We translate this into
+ * a 3DSTATE_SO_BUFFER packet.  We can handle most fields, but don't yet
+ * know which buffer this represents, or whether we ought to zero the
+ * write-offsets, or append.  Those are handled in the set() hook.
+ */
 static struct pipe_stream_output_target *
 iris_create_stream_output_target(struct pipe_context *ctx,
                                  struct pipe_resource *res,
@@ -1805,6 +2094,13 @@ iris_stream_output_target_destroy(struct pipe_context *ctx,
    free(cso);
 }
 
+/**
+ * The pipe->set_stream_output_targets() driver hook.
+ *
+ * At this point, we know which targets are bound to a particular index,
+ * and also whether we want to append or start over.  We can finish the
+ * 3DSTATE_SO_BUFFER packets we started earlier.
+ */
 static void
 iris_set_stream_output_targets(struct pipe_context *ctx,
                                unsigned num_targets,
@@ -1860,6 +2156,18 @@ iris_set_stream_output_targets(struct pipe_context *ctx,
    ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS;
 }
 
+/**
+ * An iris-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and
+ * 3DSTATE_STREAMOUT packets.
+ *
+ * 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout
+ * hardware to record.  We can create it entirely based on the shader, with
+ * no dynamic state dependencies.
+ *
+ * 3DSTATE_STREAMOUT is an annoying mix of shader-based information and
+ * state-based settings.  We capture the shader-related ones here, and merge
+ * the rest in at draw time.
+ */
 static uint32_t *
 iris_create_so_decl_list(const struct pipe_stream_output_info *info,
                          const struct brw_vue_map *vue_map)
@@ -2202,8 +2510,14 @@ iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
 static void
 iris_bind_compute_state(struct pipe_context *ctx, void *state)
 {
+   // XXX: do something
 }
 
+/* ------------------------------------------------------------------- */
+
+/**
+ * Set sampler-related program key fields based on the current state.
+ */
 static void
 iris_populate_sampler_key(const struct iris_context *ice,
                           struct brw_sampler_prog_key_data *key)
@@ -2213,6 +2527,9 @@ iris_populate_sampler_key(const struct iris_context *ice,
    }
 }
 
+/**
+ * Populate VS program key fields based on the current state.
+ */
 static void
 iris_populate_vs_key(const struct iris_context *ice,
                      struct brw_vs_prog_key *key)
@@ -2220,6 +2537,9 @@ iris_populate_vs_key(const struct iris_context *ice,
    iris_populate_sampler_key(ice, &key->tex);
 }
 
+/**
+ * Populate TCS program key fields based on the current state.
+ */
 static void
 iris_populate_tcs_key(const struct iris_context *ice,
                       struct brw_tcs_prog_key *key)
@@ -2227,6 +2547,9 @@ iris_populate_tcs_key(const struct iris_context *ice,
    iris_populate_sampler_key(ice, &key->tex);
 }
 
+/**
+ * Populate TES program key fields based on the current state.
+ */
 static void
 iris_populate_tes_key(const struct iris_context *ice,
                       struct brw_tes_prog_key *key)
@@ -2234,6 +2557,9 @@ iris_populate_tes_key(const struct iris_context *ice,
    iris_populate_sampler_key(ice, &key->tex);
 }
 
+/**
+ * Populate GS program key fields based on the current state.
+ */
 static void
 iris_populate_gs_key(const struct iris_context *ice,
                      struct brw_gs_prog_key *key)
@@ -2241,6 +2567,9 @@ iris_populate_gs_key(const struct iris_context *ice,
    iris_populate_sampler_key(ice, &key->tex);
 }
 
+/**
+ * Populate FS program key fields based on the current state.
+ */
 static void
 iris_populate_fs_key(const struct iris_context *ice,
                      struct brw_wm_prog_key *key)
@@ -2303,6 +2632,9 @@ KSP(const struct iris_compiled_shader *shader)
    pkt.StatisticsEnable = true;                                           \
    pkt.Enable           = true;
 
+/**
+ * Encode most of 3DSTATE_VS based on the compiled shader.
+ */
 static void
 iris_store_vs_state(const struct gen_device_info *devinfo,
                     struct iris_compiled_shader *shader)
@@ -2319,6 +2651,9 @@ iris_store_vs_state(const struct gen_device_info *devinfo,
    }
 }
 
+/**
+ * Encode most of 3DSTATE_HS based on the compiled shader.
+ */
 static void
 iris_store_tcs_state(const struct gen_device_info *devinfo,
                      struct iris_compiled_shader *shader)
@@ -2336,6 +2671,9 @@ iris_store_tcs_state(const struct gen_device_info *devinfo,
    }
 }
 
+/**
+ * Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader.
+ */
 static void
 iris_store_tes_state(const struct gen_device_info *devinfo,
                      struct iris_compiled_shader *shader)
@@ -2370,6 +2708,9 @@ iris_store_tes_state(const struct gen_device_info *devinfo,
 
 }
 
+/**
+ * Encode most of 3DSTATE_GS based on the compiled shader.
+ */
 static void
 iris_store_gs_state(const struct gen_device_info *devinfo,
                     struct iris_compiled_shader *shader)
@@ -2414,6 +2755,9 @@ iris_store_gs_state(const struct gen_device_info *devinfo,
    }
 }
 
+/**
+ * Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader.
+ */
 static void
 iris_store_fs_state(const struct gen_device_info *devinfo,
                     struct iris_compiled_shader *shader)
@@ -2495,6 +2839,11 @@ iris_store_fs_state(const struct gen_device_info *devinfo,
    }
 }
 
+/**
+ * Compute the size of the derived data (shader command packets).
+ *
+ * This must match the data written by the iris_store_xs_state() functions.
+ */
 static unsigned
 iris_derived_program_state_size(enum iris_program_cache_id cache_id)
 {
@@ -2514,6 +2863,12 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id)
    return sizeof(uint32_t) * dwords[cache_id];
 }
 
+/**
+ * Create any state packets corresponding to the given shader stage
+ * (i.e. 3DSTATE_VS) and save them as "derived data" in the shader variant.
+ * This means that we can look up a program in the in-memory cache and
+ * get most of the state packet without having to reconstruct it.
+ */
 static void
 iris_store_derived_program_state(const struct gen_device_info *devinfo,
                                  enum iris_program_cache_id cache_id,
@@ -2543,6 +2898,13 @@ iris_store_derived_program_state(const struct gen_device_info *devinfo,
    }
 }
 
+/* ------------------------------------------------------------------- */
+
+/**
+ * Configure the URB.
+ *
+ * XXX: write a real comment.
+ */
 static void
 iris_upload_urb_config(struct iris_context *ice, struct iris_batch *batch)
 {
@@ -2660,6 +3022,13 @@ use_ssbo(struct iris_batch *batch, struct iris_context *ice,
    return surf_state->offset;
 }
 
+/**
+ * Populate the binding table for a given shader stage.
+ *
+ * This fills out the table of pointers to surfaces required by the shader,
+ * and also adds those buffers to the validation list so the kernel can make
+ * resident before running our batch.
+ */
 static void
 iris_populate_binding_table(struct iris_context *ice,
                             struct iris_batch *batch,
@@ -2673,13 +3042,6 @@ iris_populate_binding_table(struct iris_context *ice,
    const struct shader_info *info = iris_get_shader_info(ice, stage);
    struct iris_shader_state *shs = &ice->shaders.state[stage];
 
-   // Surfaces:
-   // - pull constants
-   // - ubos/ssbos/abos
-   // - images
-   // - textures
-   // - render targets - write and read
-
    //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
    uint32_t *bt_map = binder->map + binder->bt_offset[stage];
    int s = 0;
@@ -2729,13 +3091,9 @@ iris_populate_binding_table(struct iris_context *ice,
 
 #if 0
       // XXX: not implemented yet
-      assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0);
-      assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0);
-      assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0);
       assert(prog_data->binding_table.image_start == 0xd0d0d0d0);
-      assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0);
-      //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0);
-      //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0);
+      assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0);
+      assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0);
 #endif
 }
 
@@ -2750,6 +3108,7 @@ iris_use_optional_res(struct iris_batch *batch,
    }
 }
 
+/* ------------------------------------------------------------------- */
 
 /**
  * Pin any BOs which were installed by a previous batch, and restored
@@ -3435,6 +3794,8 @@ iris_destroy_state(struct iris_context *ice)
    pipe_resource_reference(&ice->state.last_res.blend, NULL);
 }
 
+/* ------------------------------------------------------------------- */
+
 static unsigned
 flags_to_post_sync_op(uint32_t flags)
 {
@@ -3480,6 +3841,9 @@ get_post_sync_flags(enum pipe_control_flags flags)
  *
  * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
  * Restrictions for PIPE_CONTROL.
+ *
+ * You should not use this function directly.  Use the helpers in
+ * iris_pipe_control.c instead, which may split the pipe control further.
  */
 static void
 iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags,