1 /**************************************************************************
3 * Copyright 2018-2019 Alyssa Rosenzweig
4 * Copyright 2018-2019 Collabora, Ltd.
5 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
34 #include "renderonly/renderonly.h"
35 #include "util/u_dynarray.h"
36 #include "util/bitset.h"
37 #include "util/list.h"
38 #include "util/sparse_array.h"
40 #include <midgard_pack.h>
43 #define PAN_MAX_CONST_BUFFERS 16
45 /* Transient slab size. This is a balance between fragmentation against cache
46 * locality and ease of bookkeeping */
48 #define TRANSIENT_SLAB_PAGES (16) /* 64kb */
49 #define TRANSIENT_SLAB_SIZE (4096 * TRANSIENT_SLAB_PAGES)
51 /* Maximum number of transient slabs so we don't need dynamic arrays. Most
52 * interesting Mali boards are 4GB RAM max, so if the entire RAM was filled
53 * with transient slabs, you could never exceed (4GB / TRANSIENT_SLAB_SIZE)
54 * allocations anyway. By capping, we can use a fixed-size bitset for tracking
55 * free slabs, eliminating quite a bit of complexity. We can pack the free
56 * state of 8 slabs into a single byte, so for 128kb transient slabs the bitset
57 * occupies a cheap 4kb of memory */
59 #define MAX_TRANSIENT_SLABS (1024*1024 / TRANSIENT_SLAB_PAGES)
61 /* How many power-of-two levels in the BO cache do we want? 2^12
62 * minimum chosen as it is the page size that all allocations are
65 #define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
66 #define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
68 /* Fencepost problem, hence the off-by-one */
69 #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
71 /* Cache for blit shaders. Defined here so they can be cached with the device */
80 #define PAN_BLIT_NUM_TARGETS (12)
82 struct pan_blit_shaders
{
83 struct panfrost_bo
*bo
;
84 mali_ptr loads
[PAN_BLIT_NUM_TARGETS
][PAN_BLIT_NUM_TYPES
][2];
87 struct panfrost_device
{
93 /* Properties of the GPU in use */
96 unsigned thread_tls_alloc
;
99 /* Bitmask of supported compressed texture formats */
100 uint32_t compressed_formats
;
102 /* debug flags, see pan_util.h how to interpret */
105 drmVersionPtr kernel_version
;
107 struct renderonly
*ro
;
109 pthread_mutex_t bo_map_lock
;
110 struct util_sparse_array bo_map
;
113 pthread_mutex_t lock
;
115 /* List containing all cached BOs sorted in LRU (Least
116 * Recently Used) order. This allows us to quickly evict BOs
117 * that are more than 1 second old.
119 struct list_head lru
;
121 /* The BO cache is a set of buckets with power-of-two sizes
122 * ranging from 2^12 (4096, the page size) to
123 * 2^(12 + MAX_BO_CACHE_BUCKETS).
124 * Each bucket is a linked list of free panfrost_bo objects. */
126 struct list_head buckets
[NR_BO_CACHE_BUCKETS
];
129 struct pan_blit_shaders blit_shaders
;
131 /* Tiler heap shared across all tiler jobs, allocated against the
132 * device since there's only a single tiler. Since this is invisible to
133 * the CPU, it's okay for multiple contexts to reference it
134 * simultaneously; by keeping on the device struct, we eliminate a
135 * costly per-context allocation. */
137 struct panfrost_bo
*tiler_heap
;
141 panfrost_open_device(void *memctx
, int fd
, struct panfrost_device
*dev
);
144 panfrost_close_device(struct panfrost_device
*dev
);
147 panfrost_supports_compressed_format(struct panfrost_device
*dev
, unsigned fmt
);
149 static inline struct panfrost_bo
*
150 pan_lookup_bo(struct panfrost_device
*dev
, uint32_t gem_handle
)
152 return util_sparse_array_get(&dev
->bo_map
, gem_handle
);