src/gallium/drivers/iris/iris_batch.h

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #ifndef IRIS_BATCH_DOT_H
  25 #define IRIS_BATCH_DOT_H
  26
  27 #include <stdint.h>
  28 #include <stdbool.h>
  29 #include <string.h>
  30
  31 #include "util/u_dynarray.h"
  32
  33 #include "drm-uapi/i915_drm.h"
  34 #include "common/gen_decoder.h"
  35
  36 #include "iris_fence.h"
  37 #include "iris_fine_fence.h"
  38
  39 struct iris_context;
  40
  41 /* The kernel assumes batchbuffers are smaller than 256kB. */
  42 #define MAX_BATCH_SIZE (256 * 1024)
  43
  44 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
  45  * or 12 bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another
  46  * 24 bytes for the seqno write (using PIPE_CONTROL).
  47  */
  48 #define BATCH_RESERVED 36
  49
  50 /* Our target batch size - flush approximately at this point. */
  51 #define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
  52
  53 enum iris_batch_name {
  54    IRIS_BATCH_RENDER,
  55    IRIS_BATCH_COMPUTE,
  56 };
  57
  58 #define IRIS_BATCH_COUNT 2
  59
  60 struct iris_batch {
  61    struct iris_screen *screen;
  62    struct pipe_debug_callback *dbg;
  63    struct pipe_device_reset_callback *reset;
  64
  65    /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
  66    enum iris_batch_name name;
  67
  68    /** Current batchbuffer being queued up. */
  69    struct iris_bo *bo;
  70    void *map;
  71    void *map_next;
  72
  73    /** Size of the primary batch being submitted to execbuf (in bytes). */
  74    unsigned primary_batch_size;
  75
  76    /** Total size of all chained batches (in bytes). */
  77    unsigned total_chained_batch_size;
  78
  79    /** Last Surface State Base Address set in this hardware context. */
  80    uint64_t last_surface_base_address;
  81
  82    uint32_t hw_ctx_id;
  83
  84    /** The validation list */
  85    struct drm_i915_gem_exec_object2 *validation_list;
  86    struct iris_bo **exec_bos;
  87    int exec_count;
  88    int exec_array_size;
  89
  90    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
  91     * instruction is a MI_BATCH_BUFFER_END).
  92     */
  93    bool noop_enabled;
  94
  95    /**
  96     * A list of iris_syncobjs associated with this batch.
  97     *
  98     * The first list entry will always be a signalling sync-point, indicating
  99     * that this batch has completed.  The others are likely to be sync-points
 100     * to wait on before executing the batch.
 101     */
 102    struct util_dynarray syncobjs;
 103
 104    /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
 105    struct util_dynarray exec_fences;
 106
 107    /** The amount of aperture space (in bytes) used by all exec_bos */
 108    int aperture_space;
 109
 110    struct {
 111       /** Uploader to use for sequence numbers */
 112       struct u_upload_mgr *uploader;
 113
 114       /** GPU buffer and CPU map where our seqno's will be written. */
 115       struct iris_state_ref ref;
 116       uint32_t *map;
 117
 118       /** The sequence number to write the next time we add a fence. */
 119       uint32_t next;
 120    } fine_fences;
 121
 122    /** A seqno (and syncobj) for the last batch that was submitted. */
 123    struct iris_fine_fence *last_fence;
 124
 125    /** List of other batches which we might need to flush to use a BO */
 126    struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
 127
 128    struct {
 129       /**
 130        * Set of struct brw_bo * that have been rendered to within this
 131        * batchbuffer and would need flushing before being used from another
 132        * cache domain that isn't coherent with it (i.e. the sampler).
 133        */
 134       struct hash_table *render;
 135    } cache;
 136
 137    struct gen_batch_decode_ctx decoder;
 138    struct hash_table_u64 *state_sizes;
 139
 140    /**
 141     * Matrix representation of the cache coherency status of the GPU at the
 142     * current end point of the batch.  For every i and j,
 143     * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
 144     * cache domain j visible to cache domain i (which obviously implies that
 145     * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
 146     * can be used to efficiently determine whether synchronization is
 147     * necessary before accessing data from cache domain i if it was previously
 148     * accessed from another cache domain j.
 149     */
 150    uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
 151
 152    /**
 153     * Sequence number used to track the completion of any subsequent memory
 154     * operations in the batch until the next sync boundary.
 155     */
 156    uint64_t next_seqno;
 157
 158    /** Have we emitted any draw calls to this batch? */
 159    bool contains_draw;
 160
 161    /** Have we emitted any draw calls with next_seqno? */
 162    bool contains_draw_with_next_seqno;
 163
 164    /**
 165     * Number of times iris_batch_sync_region_start() has been called without a
 166     * matching iris_batch_sync_region_end() on this batch.
 167     */
 168    uint32_t sync_region_depth;
 169
 170    uint32_t last_aux_map_state;
 171 };
 172
 173 void iris_init_batch(struct iris_context *ice,
 174                      enum iris_batch_name name,
 175                      int priority);
 176 void iris_chain_to_new_batch(struct iris_batch *batch);
 177 void iris_batch_free(struct iris_batch *batch);
 178 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
 179
 180 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
 181 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
 182
 183 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
 184
 185 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
 186
 187 #define RELOC_WRITE EXEC_OBJECT_WRITE
 188
 189 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
 190                         bool writable, enum iris_domain access);
 191
 192 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
 193
 194 static inline unsigned
 195 iris_batch_bytes_used(struct iris_batch *batch)
 196 {
 197    return batch->map_next - batch->map;
 198 }
 199
 200 /**
 201  * Ensure the current command buffer has \param size bytes of space
 202  * remaining.  If not, this creates a secondary batch buffer and emits
 203  * a jump from the primary batch to the start of the secondary.
 204  *
 205  * Most callers want iris_get_command_space() instead.
 206  */
 207 static inline void
 208 iris_require_command_space(struct iris_batch *batch, unsigned size)
 209 {
 210    const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
 211
 212    if (required_bytes >= BATCH_SZ) {
 213       iris_chain_to_new_batch(batch);
 214    }
 215 }
 216
 217 /**
 218  * Allocate space in the current command buffer, and return a pointer
 219  * to the mapped area so the caller can write commands there.
 220  *
 221  * This should be called whenever emitting commands.
 222  */
 223 static inline void *
 224 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
 225 {
 226    iris_require_command_space(batch, bytes);
 227    void *map = batch->map_next;
 228    batch->map_next += bytes;
 229    return map;
 230 }
 231
 232 /**
 233  * Helper to emit GPU commands - allocates space, copies them there.
 234  */
 235 static inline void
 236 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
 237 {
 238    void *map = iris_get_command_space(batch, size);
 239    memcpy(map, data, size);
 240 }
 241
 242 /**
 243  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
 244  */
 245 static inline struct iris_syncobj *
 246 iris_batch_get_signal_syncobj(struct iris_batch *batch)
 247 {
 248    /* The signalling syncobj is the first one in the list. */
 249    struct iris_syncobj *syncobj =
 250       ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
 251    return syncobj;
 252 }
 253
 254
 255 /**
 256  * Take a reference to the batch's signalling syncobj.
 257  *
 258  * Callers can use this to wait for the the current batch under construction
 259  * to complete (after flushing it).
 260  */
 261 static inline void
 262 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
 263                                    struct iris_syncobj **out_syncobj)
 264 {
 265    struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
 266    iris_syncobj_reference(batch->screen, out_syncobj, syncobj);
 267 }
 268
 269 /**
 270  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
 271  */
 272 static inline void
 273 iris_record_state_size(struct hash_table_u64 *ht,
 274                        uint32_t offset_from_base,
 275                        uint32_t size)
 276 {
 277    if (ht) {
 278       _mesa_hash_table_u64_insert(ht, offset_from_base,
 279                                   (void *)(uintptr_t) size);
 280    }
 281 }
 282
 283 /**
 284  * Mark the start of a region in the batch with stable synchronization
 285  * sequence number.  Any buffer object accessed by the batch buffer only needs
 286  * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
 287  * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
 288  */
 289 static inline void
 290 iris_batch_sync_region_start(struct iris_batch *batch)
 291 {
 292    batch->sync_region_depth++;
 293 }
 294
 295 /**
 296  * Mark the end of a region in the batch with stable synchronization sequence
 297  * number.  Should be called once after each call to
 298  * iris_batch_sync_region_start().
 299  */
 300 static inline void
 301 iris_batch_sync_region_end(struct iris_batch *batch)
 302 {
 303    assert(batch->sync_region_depth);
 304    batch->sync_region_depth--;
 305 }
 306
 307 /**
 308  * Start a new synchronization section at the current point of the batch,
 309  * unless disallowed by a previous iris_batch_sync_region_start().
 310  */
 311 static inline void
 312 iris_batch_sync_boundary(struct iris_batch *batch)
 313 {
 314    if (!batch->sync_region_depth) {
 315       batch->contains_draw_with_next_seqno = false;
 316       batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
 317       assert(batch->next_seqno > 0);
 318    }
 319 }
 320
 321 /**
 322  * Update the cache coherency status of the batch to reflect a flush of the
 323  * specified caching domain.
 324  */
 325 static inline void
 326 iris_batch_mark_flush_sync(struct iris_batch *batch,
 327                            enum iris_domain access)
 328 {
 329    batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
 330 }
 331
 332 /**
 333  * Update the cache coherency status of the batch to reflect an invalidation
 334  * of the specified caching domain.  All prior flushes of other caches will be
 335  * considered visible to the specified caching domain.
 336  */
 337 static inline void
 338 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
 339                                 enum iris_domain access)
 340 {
 341    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
 342       batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
 343 }
 344
 345 /**
 346  * Update the cache coherency status of the batch to reflect a reset.  All
 347  * previously accessed data can be considered visible to every caching domain
 348  * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
 349  */
 350 static inline void
 351 iris_batch_mark_reset_sync(struct iris_batch *batch)
 352 {
 353    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
 354       for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
 355          batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
 356 }
 357
 358 #endif