2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include <sys/types.h>
31 #include "c11/threads.h"
32 #include "util/macros.h"
33 #include "util/u_atomic.h"
34 #include "util/list.h"
35 #include "pipe/p_defines.h"
38 struct gen_device_info
;
39 struct pipe_debug_callback
;
42 * Memory zones. When allocating a buffer, you can request that it is
43 * placed into a specific region of the virtual address space (PPGTT).
45 * Most buffers can go anywhere (IRIS_MEMZONE_OTHER). Some buffers are
46 * accessed via an offset from a base address. STATE_BASE_ADDRESS has
47 * a maximum 4GB size for each region, so we need to restrict those
48 * buffers to be within 4GB of the base. Each memory zone corresponds
49 * to a particular base address.
51 * We lay out the virtual address space as follows:
53 * - [0, 4K): Nothing (empty page for null address)
54 * - [4K, 4G): Shaders (Instruction Base Address)
55 * - [4G, 8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
56 * - [8G, 12G): Dynamic (Dynamic State Base Address)
57 * - [12G, *): Other (everything else in the full 48-bit VMA)
59 * A special buffer for border color lives at the start of the dynamic state
60 * memory zone. This unfortunately has to be handled specially because the
61 * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
63 * Each GL context uses a separate GEM context, which technically gives them
64 * each a separate VMA. However, we assign address globally, so buffers will
65 * have the same address in all GEM contexts. This lets us have a single BO
66 * field for the address, which is easy and cheap.
68 enum iris_memory_zone
{
75 IRIS_MEMZONE_BORDER_COLOR_POOL
,
78 /* Intentionally exclude single buffer "zones" */
79 #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
81 #define IRIS_BINDER_SIZE (64 * 1024)
82 #define IRIS_MAX_BINDERS 100
84 #define IRIS_MEMZONE_SHADER_START (0ull * (1ull << 32))
85 #define IRIS_MEMZONE_BINDER_START (1ull * (1ull << 32))
86 #define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
87 #define IRIS_MEMZONE_DYNAMIC_START (2ull * (1ull << 32))
88 #define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32))
90 #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
91 #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
94 * Classification of the various incoherent caches of the GPU into a number of
98 /** Render color cache. */
99 IRIS_DOMAIN_RENDER_WRITE
= 0,
100 /** (Hi)Z/stencil cache. */
101 IRIS_DOMAIN_DEPTH_WRITE
,
102 /** Any other read-write cache. */
103 IRIS_DOMAIN_OTHER_WRITE
,
104 /** Any other read-only cache. */
105 IRIS_DOMAIN_OTHER_READ
,
106 /** Number of caching domains. */
108 /** Not a real cache, use to opt out of the cache tracking mechanism. */
109 IRIS_DOMAIN_NONE
= NUM_IRIS_DOMAINS
113 * Whether a caching domain is guaranteed not to write any data to memory.
116 iris_domain_is_read_only(enum iris_domain access
)
118 return access
== IRIS_DOMAIN_OTHER_READ
;
123 * Size in bytes of the buffer object.
125 * The size may be larger than the size originally requested for the
126 * allocation, such as being aligned to page size.
130 /** Buffer manager context associated with this buffer object */
131 struct iris_bufmgr
*bufmgr
;
133 /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
136 /** The GEM handle for this buffer object. */
140 * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
141 * Translation Table).
143 * Although each hardware context has its own VMA, we assign BO's to the
144 * same address in all contexts, for simplicity.
149 * If non-zero, then this bo has an aux-map translation to this address.
151 uint64_t aux_map_address
;
154 * The validation list index for this buffer, or -1 when not in a batch.
155 * Note that a single buffer may be in multiple batches (contexts), and
156 * this is a global field, which refers to the last batch using the BO.
157 * It should not be considered authoritative, but can be used to avoid a
158 * linear walk of the validation list in the common case by guessing that
159 * exec_bos[bo->index] == bo and confirming whether that's the case.
161 * XXX: this is not ideal now that we have more than one batch per context,
162 * XXX: as the index will flop back and forth between the render index and
163 * XXX: compute index...
173 * Kenel-assigned global name for this object
175 * List contains both flink named and prime fd'd objects
177 unsigned global_name
;
180 * Current tiling mode
182 uint32_t tiling_mode
;
187 /** Mapped address for the buffer, saved across map/unmap cycles */
189 /** GTT virtual address for the buffer, saved across map/unmap cycles */
191 /** WC CPU address for the buffer, saved across map/unmap cycles */
195 struct list_head head
;
197 /** List of GEM handle exports of this buffer (bo_export) */
198 struct list_head exports
;
201 * Synchronization sequence number of most recent access of this BO from
202 * each caching domain.
204 * Although this is a global field, use in multiple contexts should be
205 * safe, see iris_emit_buffer_barrier_for() for details.
207 * Also align it to 64 bits. This will make atomic operations faster on 32
210 uint64_t last_seqnos
[NUM_IRIS_DOMAINS
] __attribute__ ((aligned (8)));
213 * Boolean of whether the GPU is definitely not accessing the buffer.
215 * This is only valid when reusable, since non-reusable
216 * buffers are those that have been shared with other
217 * processes, so we don't know their state.
222 * Boolean of whether this buffer can be re-used
227 * Boolean of whether this buffer has been shared with an external client.
232 * Boolean of whether this buffer is cache coherent
237 * Boolean of whether this buffer points into user memory
242 #define BO_ALLOC_ZEROED (1<<0)
243 #define BO_ALLOC_COHERENT (1<<1)
246 * Allocate a buffer object.
248 * Buffer objects are not necessarily initially mapped into CPU virtual
249 * address space or graphics device aperture. They must be mapped
250 * using iris_bo_map() to be used by the CPU.
252 struct iris_bo
*iris_bo_alloc(struct iris_bufmgr
*bufmgr
,
255 enum iris_memory_zone memzone
);
258 * Allocate a tiled buffer object.
260 * Alignment for tiled objects is set automatically; the 'flags'
261 * argument provides a hint about how the object will be used initially.
263 * Valid tiling formats are:
268 struct iris_bo
*iris_bo_alloc_tiled(struct iris_bufmgr
*bufmgr
,
272 enum iris_memory_zone memzone
,
273 uint32_t tiling_mode
,
278 iris_bo_create_userptr(struct iris_bufmgr
*bufmgr
, const char *name
,
279 void *ptr
, size_t size
,
280 enum iris_memory_zone memzone
);
282 /** Takes a reference on a buffer object */
284 iris_bo_reference(struct iris_bo
*bo
)
286 p_atomic_inc(&bo
->refcount
);
290 * Releases a reference on a buffer object, freeing the data if
291 * no references remain.
293 void iris_bo_unreference(struct iris_bo
*bo
);
295 #define MAP_READ PIPE_TRANSFER_READ
296 #define MAP_WRITE PIPE_TRANSFER_WRITE
297 #define MAP_ASYNC PIPE_TRANSFER_UNSYNCHRONIZED
298 #define MAP_PERSISTENT PIPE_TRANSFER_PERSISTENT
299 #define MAP_COHERENT PIPE_TRANSFER_COHERENT
301 #define MAP_INTERNAL_MASK (0xffu << 24)
302 #define MAP_RAW (0x01 << 24)
304 #define MAP_FLAGS (MAP_READ | MAP_WRITE | MAP_ASYNC | \
305 MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
308 * Maps the buffer into userspace.
310 * This function will block waiting for any existing execution on the
311 * buffer to complete, first. The resulting mapping is returned.
313 MUST_CHECK
void *iris_bo_map(struct pipe_debug_callback
*dbg
,
314 struct iris_bo
*bo
, unsigned flags
);
317 * Reduces the refcount on the userspace mapping of the buffer
320 static inline int iris_bo_unmap(struct iris_bo
*bo
) { return 0; }
323 * Waits for rendering to an object by the GPU to have completed.
325 * This is not required for any access to the BO by bo_map,
326 * bo_subdata, etc. It is merely a way for the driver to implement
329 void iris_bo_wait_rendering(struct iris_bo
*bo
);
333 * Unref a buffer manager instance.
335 void iris_bufmgr_unref(struct iris_bufmgr
*bufmgr
);
338 * Create a visible name for a buffer which can be used by other apps
340 * \param buf Buffer to create a name for
341 * \param name Returned name
343 int iris_bo_flink(struct iris_bo
*bo
, uint32_t *name
);
346 * Make a BO externally accessible.
348 * \param bo Buffer to make external
350 void iris_bo_make_external(struct iris_bo
*bo
);
353 * Returns 1 if mapping the buffer for write could cause the process
354 * to block, due to the object being active in the GPU.
356 int iris_bo_busy(struct iris_bo
*bo
);
359 * Specify the volatility of the buffer.
360 * \param bo Buffer to create a name for
361 * \param madv The purgeable status
363 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
364 * reclaimed under memory pressure. If you subsequently require the buffer,
365 * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
367 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
368 * marked as I915_MADV_DONTNEED.
370 int iris_bo_madvise(struct iris_bo
*bo
, int madv
);
372 /* drm_bacon_bufmgr_gem.c */
373 struct iris_bufmgr
*iris_bufmgr_get_for_fd(struct gen_device_info
*devinfo
, int fd
,
375 int iris_bufmgr_get_fd(struct iris_bufmgr
*bufmgr
);
377 struct iris_bo
*iris_bo_gem_create_from_name(struct iris_bufmgr
*bufmgr
,
381 void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr
*bufmgr
);
383 int iris_bo_wait(struct iris_bo
*bo
, int64_t timeout_ns
);
385 uint32_t iris_create_hw_context(struct iris_bufmgr
*bufmgr
);
386 uint32_t iris_clone_hw_context(struct iris_bufmgr
*bufmgr
, uint32_t ctx_id
);
388 #define IRIS_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
389 #define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
390 #define IRIS_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
392 int iris_hw_context_set_priority(struct iris_bufmgr
*bufmgr
,
393 uint32_t ctx_id
, int priority
);
395 void iris_destroy_hw_context(struct iris_bufmgr
*bufmgr
, uint32_t ctx_id
);
397 int iris_bo_export_dmabuf(struct iris_bo
*bo
, int *prime_fd
);
398 struct iris_bo
*iris_bo_import_dmabuf(struct iris_bufmgr
*bufmgr
, int prime_fd
,
402 * Exports a bo as a GEM handle into a given DRM file descriptor
403 * \param bo Buffer to export
404 * \param drm_fd File descriptor where the new handle is created
405 * \param out_handle Pointer to store the new handle
407 * Returns 0 if the buffer was successfully exported, a non zero error code
410 int iris_bo_export_gem_handle_for_device(struct iris_bo
*bo
, int drm_fd
,
411 uint32_t *out_handle
);
413 uint32_t iris_bo_export_gem_handle(struct iris_bo
*bo
);
415 int iris_reg_read(struct iris_bufmgr
*bufmgr
, uint32_t offset
, uint64_t *out
);
417 int drm_ioctl(int fd
, unsigned long request
, void *arg
);
420 * Returns the BO's address relative to the appropriate base address.
422 * All of our base addresses are programmed to the start of a 4GB region,
423 * so simply returning the bottom 32 bits of the BO address will give us
424 * the offset from whatever base address corresponds to that memory region.
426 static inline uint32_t
427 iris_bo_offset_from_base_address(struct iris_bo
*bo
)
429 /* This only works for buffers in the memory zones corresponding to a
430 * base address - the top, unbounded memory zone doesn't have a base.
432 assert(bo
->gtt_offset
< IRIS_MEMZONE_OTHER_START
);
433 return bo
->gtt_offset
;
437 * Track access of a BO from the specified caching domain and sequence number.
439 * Can be used without locking. Only the most recent access (i.e. highest
443 iris_bo_bump_seqno(struct iris_bo
*bo
, uint64_t seqno
,
444 enum iris_domain type
)
446 uint64_t *const last_seqno
= &bo
->last_seqnos
[type
];
447 uint64_t tmp
, prev_seqno
= p_atomic_read(last_seqno
);
449 while (prev_seqno
< seqno
&&
450 prev_seqno
!= (tmp
= p_atomic_cmpxchg(last_seqno
, prev_seqno
, seqno
)))
454 enum iris_memory_zone
iris_memzone_for_address(uint64_t address
);
456 #endif /* IRIS_BUFMGR_H */