2 * Copyright © 2016-2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include <sys/types.h>
31 #ifndef HAVE_MEMFD_CREATE
32 #include <sys/syscall.h>
35 memfd_create(const char *name
, unsigned int flags
)
37 return syscall(SYS_memfd_create
, name
, flags
);
42 struct list_head link
;
43 struct gen_batch_decode_bo bo
;
58 const uint8_t *aub_data
;
62 add_gtt_bo_map(struct aub_mem
*mem
, struct gen_batch_decode_bo bo
, bool unmap_after_use
)
64 struct bo_map
*m
= calloc(1, sizeof(*m
));
67 m
->unmap_after_use
= unmap_after_use
;
68 list_add(&m
->link
, &mem
->maps
);
72 aub_mem_clear_bo_maps(struct aub_mem
*mem
)
74 list_for_each_entry_safe(struct bo_map
, i
, &mem
->maps
, link
) {
75 if (i
->unmap_after_use
)
76 munmap((void *)i
->bo
.map
, i
->bo
.size
);
82 static inline struct ggtt_entry
*
83 ggtt_entry_next(struct ggtt_entry
*entry
)
87 struct rb_node
*node
= rb_node_next(&entry
->node
);
90 return rb_node_data(struct ggtt_entry
, node
, node
);
94 cmp_uint64(uint64_t a
, uint64_t b
)
104 cmp_ggtt_entry(const struct rb_node
*node
, const void *addr
)
106 struct ggtt_entry
*entry
= rb_node_data(struct ggtt_entry
, node
, node
);
107 return cmp_uint64(entry
->virt_addr
, *(const uint64_t *)addr
);
110 static struct ggtt_entry
*
111 ensure_ggtt_entry(struct aub_mem
*mem
, uint64_t virt_addr
)
113 struct rb_node
*node
= rb_tree_search_sloppy(&mem
->ggtt
, &virt_addr
,
116 if (!node
|| (cmp
= cmp_ggtt_entry(node
, &virt_addr
))) {
117 struct ggtt_entry
*new_entry
= calloc(1, sizeof(*new_entry
));
118 new_entry
->virt_addr
= virt_addr
;
119 rb_tree_insert_at(&mem
->ggtt
, node
, &new_entry
->node
, cmp
> 0);
120 node
= &new_entry
->node
;
123 return rb_node_data(struct ggtt_entry
, node
, node
);
126 static struct ggtt_entry
*
127 search_ggtt_entry(struct aub_mem
*mem
, uint64_t virt_addr
)
131 struct rb_node
*node
= rb_tree_search(&mem
->ggtt
, &virt_addr
, cmp_ggtt_entry
);
136 return rb_node_data(struct ggtt_entry
, node
, node
);
140 cmp_phys_mem(const struct rb_node
*node
, const void *addr
)
142 struct phys_mem
*mem
= rb_node_data(struct phys_mem
, node
, node
);
143 return cmp_uint64(mem
->phys_addr
, *(uint64_t *)addr
);
146 static struct phys_mem
*
147 ensure_phys_mem(struct aub_mem
*mem
, uint64_t phys_addr
)
149 struct rb_node
*node
= rb_tree_search_sloppy(&mem
->mem
, &phys_addr
, cmp_phys_mem
);
151 if (!node
|| (cmp
= cmp_phys_mem(node
, &phys_addr
))) {
152 struct phys_mem
*new_mem
= calloc(1, sizeof(*new_mem
));
153 new_mem
->phys_addr
= phys_addr
;
154 new_mem
->fd_offset
= mem
->mem_fd_len
;
156 MAYBE_UNUSED
int ftruncate_res
= ftruncate(mem
->mem_fd
, mem
->mem_fd_len
+= 4096);
157 assert(ftruncate_res
== 0);
159 new_mem
->data
= mmap(NULL
, 4096, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
160 mem
->mem_fd
, new_mem
->fd_offset
);
161 assert(new_mem
->data
!= MAP_FAILED
);
163 rb_tree_insert_at(&mem
->mem
, node
, &new_mem
->node
, cmp
> 0);
164 node
= &new_mem
->node
;
167 return rb_node_data(struct phys_mem
, node
, node
);
170 static struct phys_mem
*
171 search_phys_mem(struct aub_mem
*mem
, uint64_t phys_addr
)
175 struct rb_node
*node
= rb_tree_search(&mem
->mem
, &phys_addr
, cmp_phys_mem
);
180 return rb_node_data(struct phys_mem
, node
, node
);
184 aub_mem_local_write(void *_mem
, uint64_t address
,
185 const void *data
, uint32_t size
)
187 struct aub_mem
*mem
= _mem
;
188 struct gen_batch_decode_bo bo
= {
193 add_gtt_bo_map(mem
, bo
, false);
197 aub_mem_ggtt_entry_write(void *_mem
, uint64_t address
,
198 const void *_data
, uint32_t _size
)
200 struct aub_mem
*mem
= _mem
;
201 uint64_t virt_addr
= (address
/ sizeof(uint64_t)) << 12;
202 const uint64_t *data
= _data
;
203 size_t size
= _size
/ sizeof(*data
);
204 for (const uint64_t *entry
= data
;
206 entry
++, virt_addr
+= 4096) {
207 struct ggtt_entry
*pt
= ensure_ggtt_entry(mem
, virt_addr
);
208 pt
->phys_addr
= *entry
;
213 aub_mem_phys_write(void *_mem
, uint64_t phys_address
,
214 const void *data
, uint32_t size
)
216 struct aub_mem
*mem
= _mem
;
217 uint32_t to_write
= size
;
218 for (uint64_t page
= phys_address
& ~0xfff; page
< phys_address
+ size
; page
+= 4096) {
219 struct phys_mem
*pmem
= ensure_phys_mem(mem
, page
);
220 uint64_t offset
= MAX2(page
, phys_address
) - page
;
221 uint32_t size_this_page
= MIN2(to_write
, 4096 - offset
);
222 to_write
-= size_this_page
;
223 memcpy(pmem
->data
+ offset
, data
, size_this_page
);
224 pmem
->aub_data
= data
- offset
;
225 data
= (const uint8_t *)data
+ size_this_page
;
230 aub_mem_ggtt_write(void *_mem
, uint64_t virt_address
,
231 const void *data
, uint32_t size
)
233 struct aub_mem
*mem
= _mem
;
234 uint32_t to_write
= size
;
235 for (uint64_t page
= virt_address
& ~0xfff; page
< virt_address
+ size
; page
+= 4096) {
236 struct ggtt_entry
*entry
= search_ggtt_entry(mem
, page
);
237 assert(entry
&& entry
->phys_addr
& 0x1);
239 uint64_t offset
= MAX2(page
, virt_address
) - page
;
240 uint32_t size_this_page
= MIN2(to_write
, 4096 - offset
);
241 to_write
-= size_this_page
;
243 uint64_t phys_page
= entry
->phys_addr
& ~0xfff; /* Clear the validity bits. */
244 aub_mem_phys_write(mem
, phys_page
+ offset
, data
, size_this_page
);
245 data
= (const uint8_t *)data
+ size_this_page
;
249 struct gen_batch_decode_bo
250 aub_mem_get_ggtt_bo(void *_mem
, uint64_t address
)
252 struct aub_mem
*mem
= _mem
;
253 struct gen_batch_decode_bo bo
= {0};
255 list_for_each_entry(struct bo_map
, i
, &mem
->maps
, link
)
256 if (i
->bo
.addr
<= address
&& i
->bo
.addr
+ i
->bo
.size
> address
)
261 struct ggtt_entry
*start
=
262 (struct ggtt_entry
*)rb_tree_search_sloppy(&mem
->ggtt
, &address
,
264 if (start
&& start
->virt_addr
< address
)
265 start
= ggtt_entry_next(start
);
269 struct ggtt_entry
*last
= start
;
270 for (struct ggtt_entry
*i
= ggtt_entry_next(last
);
271 i
&& last
->virt_addr
+ 4096 == i
->virt_addr
;
272 last
= i
, i
= ggtt_entry_next(last
))
275 bo
.addr
= MIN2(address
, start
->virt_addr
);
276 bo
.size
= last
->virt_addr
- bo
.addr
+ 4096;
277 bo
.map
= mmap(NULL
, bo
.size
, PROT_READ
, MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
278 assert(bo
.map
!= MAP_FAILED
);
280 for (struct ggtt_entry
*i
= start
;
282 i
= i
== last
? NULL
: ggtt_entry_next(i
)) {
283 uint64_t phys_addr
= i
->phys_addr
& ~0xfff;
284 struct phys_mem
*phys_mem
= search_phys_mem(mem
, phys_addr
);
289 uint32_t map_offset
= i
->virt_addr
- address
;
290 void *res
= mmap((uint8_t *)bo
.map
+ map_offset
, 4096, PROT_READ
,
291 MAP_SHARED
| MAP_FIXED
, mem
->mem_fd
, phys_mem
->fd_offset
);
292 assert(res
!= MAP_FAILED
);
295 add_gtt_bo_map(mem
, bo
, true);
300 static struct phys_mem
*
301 ppgtt_walk(struct aub_mem
*mem
, uint64_t pml4
, uint64_t address
)
304 uint64_t addr
= pml4
;
305 for (int level
= 4; level
> 0; level
--) {
306 struct phys_mem
*table
= search_phys_mem(mem
, addr
);
309 int index
= (address
>> shift
) & 0x1ff;
310 uint64_t entry
= ((uint64_t *)table
->data
)[index
];
313 addr
= entry
& ~0xfff;
316 return search_phys_mem(mem
, addr
);
320 ppgtt_mapped(struct aub_mem
*mem
, uint64_t pml4
, uint64_t address
)
322 return ppgtt_walk(mem
, pml4
, address
) != NULL
;
325 struct gen_batch_decode_bo
326 aub_mem_get_ppgtt_bo(void *_mem
, uint64_t address
)
328 struct aub_mem
*mem
= _mem
;
329 struct gen_batch_decode_bo bo
= {0};
333 if (!ppgtt_mapped(mem
, mem
->pml4
, address
))
336 /* Map everything until the first gap since we don't know how much the
337 * decoder actually needs.
339 uint64_t end
= address
;
340 while (ppgtt_mapped(mem
, mem
->pml4
, end
))
344 bo
.size
= end
- address
;
345 bo
.map
= mmap(NULL
, bo
.size
, PROT_READ
, MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
346 assert(bo
.map
!= MAP_FAILED
);
348 for (uint64_t page
= address
; page
< end
; page
+= 4096) {
349 struct phys_mem
*phys_mem
= ppgtt_walk(mem
, mem
->pml4
, page
);
351 void *res
= mmap((uint8_t *)bo
.map
+ (page
- bo
.addr
), 4096, PROT_READ
,
352 MAP_SHARED
| MAP_FIXED
, mem
->mem_fd
, phys_mem
->fd_offset
);
353 assert(res
!= MAP_FAILED
);
356 add_gtt_bo_map(mem
, bo
, true);
362 aub_mem_init(struct aub_mem
*mem
)
364 memset(mem
, 0, sizeof(*mem
));
366 list_inithead(&mem
->maps
);
368 mem
->mem_fd
= memfd_create("phys memory", 0);
370 return mem
->mem_fd
!= -1;
374 aub_mem_fini(struct aub_mem
*mem
)
376 if (mem
->mem_fd
== -1)
379 aub_mem_clear_bo_maps(mem
);
382 rb_tree_foreach_safe(struct ggtt_entry
, entry
, &mem
->ggtt
, node
) {
383 rb_tree_remove(&mem
->ggtt
, &entry
->node
);
386 rb_tree_foreach_safe(struct phys_mem
, entry
, &mem
->mem
, node
) {
387 rb_tree_remove(&mem
->mem
, &entry
->node
);
395 struct gen_batch_decode_bo
396 aub_mem_get_phys_addr_data(struct aub_mem
*mem
, uint64_t phys_addr
)
398 struct phys_mem
*page
= search_phys_mem(mem
, phys_addr
);
400 (struct gen_batch_decode_bo
) { .map
= page
->data
, .addr
= page
->phys_addr
, .size
= 4096 } :
401 (struct gen_batch_decode_bo
) {};
404 struct gen_batch_decode_bo
405 aub_mem_get_ppgtt_addr_data(struct aub_mem
*mem
, uint64_t virt_addr
)
407 struct phys_mem
*page
= ppgtt_walk(mem
, mem
->pml4
, virt_addr
);
409 (struct gen_batch_decode_bo
) { .map
= page
->data
, .addr
= virt_addr
& ~((1ULL << 12) - 1), .size
= 4096 } :
410 (struct gen_batch_decode_bo
) {};
413 struct gen_batch_decode_bo
414 aub_mem_get_ppgtt_addr_aub_data(struct aub_mem
*mem
, uint64_t virt_addr
)
416 struct phys_mem
*page
= ppgtt_walk(mem
, mem
->pml4
, virt_addr
);
418 (struct gen_batch_decode_bo
) { .map
= page
->aub_data
, .addr
= virt_addr
& ~((1ULL << 12) - 1), .size
= 4096 } :
419 (struct gen_batch_decode_bo
) {};