2 * Copyright (c) 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * The aux map provides a multi-level lookup of the main surface address which
26 * ends up providing information about the auxiliary surface data, including
27 * the address where the auxiliary data resides.
29 * The 48-bit VMA (GPU) address of the main surface is split to do the address
32 * 48 bit address of main surface
33 * +--------+--------+--------+------+
34 * | 47:36 | 35:24 | 23:16 | 15:0 |
35 * | L3-idx | L2-idx | L1-idx | ... |
36 * +--------+--------+--------+------+
38 * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is
39 * located by indexing into this buffer as a uint64_t array using the L3-idx
40 * value. The 64-bit L3 entry is defined as:
42 * +-------+-------------+------+---+
43 * | 63:48 | 47:15 | 14:1 | 0 |
44 * | ... | L2-tbl-addr | ... | V |
45 * +-------+-------------+------+---+
47 * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for
48 * the level-2 table entries, with the lower address bits filled with zero.
49 * The L2 Table Entry is located by indexing into this buffer as a uint64_t
50 * array using the L2-idx value. The 64-bit L2 entry is similar to the L3
51 * entry, except with 2 additional address bits:
53 * +-------+-------------+------+---+
54 * | 63:48 | 47:13 | 12:1 | 0 |
55 * | ... | L1-tbl-addr | ... | V |
56 * +-------+-------------+------+---+
58 * If the `V` bit is set, then the L1-tbl-addr gives the address for the
59 * level-1 table entries, with the lower address bits filled with zero. The L1
60 * Table Entry is located by indexing into this buffer as a uint64_t array
61 * using the L1-idx value. The 64-bit L1 entry is defined as:
63 * +--------+------+-------+-------+-------+---------------+-----+---+
64 * | 63:58 | 57 | 56:54 | 53:52 | 51:48 | 47:8 | 7:1 | 0 |
65 * | Format | Y/Cr | Depth | TM | ... | aux-data-addr | ... | V |
66 * +--------+------+-------+-------+-------+---------------+-----+---+
69 * - Format: See `get_format_encoding`
70 * - Y/Cr: 0=not-Y/Cr, 1=Y/Cr
71 * - (bit) Depth: See `get_bpp_encoding`
72 * - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd
73 * - aux-data-addr: VMA/GPU address for the aux-data
77 #include "gen_aux_map.h"
80 #include "dev/gen_device_info.h"
83 #include "drm-uapi/i915_drm.h"
84 #include "util/list.h"
85 #include "util/ralloc.h"
86 #include "util/u_atomic.h"
87 #include "main/macros.h"
94 static const bool aux_map_debug
= false;
96 struct aux_map_buffer
{
97 struct list_head link
;
98 struct gen_buffer
*buffer
;
101 struct gen_aux_map_context
{
103 pthread_mutex_t mutex
;
104 struct gen_mapped_pinned_buffer_alloc
*buffer_alloc
;
105 uint32_t num_buffers
;
106 struct list_head buffers
;
107 uint64_t level3_base_addr
;
108 uint64_t *level3_map
;
109 uint32_t tail_offset
, tail_remaining
;
114 add_buffer(struct gen_aux_map_context
*ctx
)
116 struct aux_map_buffer
*buf
= ralloc(ctx
, struct aux_map_buffer
);
120 const uint32_t size
= 0x100000;
121 buf
->buffer
= ctx
->buffer_alloc
->alloc(ctx
->driver_ctx
, size
);
127 assert(buf
->buffer
->map
!= NULL
);
129 list_addtail(&buf
->link
, &ctx
->buffers
);
130 ctx
->tail_offset
= 0;
131 ctx
->tail_remaining
= size
;
132 p_atomic_inc(&ctx
->num_buffers
);
138 advance_current_pos(struct gen_aux_map_context
*ctx
, uint32_t size
)
140 assert(ctx
->tail_remaining
>= size
);
141 ctx
->tail_remaining
-= size
;
142 ctx
->tail_offset
+= size
;
146 align_and_verify_space(struct gen_aux_map_context
*ctx
, uint32_t size
,
149 if (ctx
->tail_remaining
< size
)
152 struct aux_map_buffer
*tail
=
153 list_last_entry(&ctx
->buffers
, struct aux_map_buffer
, link
);
154 uint64_t gpu
= tail
->buffer
->gpu
+ ctx
->tail_offset
;
155 uint64_t aligned
= align64(gpu
, align
);
157 if ((aligned
- gpu
) + size
> ctx
->tail_remaining
) {
160 if (aligned
- gpu
> 0)
161 advance_current_pos(ctx
, aligned
- gpu
);
167 get_current_pos(struct gen_aux_map_context
*ctx
, uint64_t *gpu
, uint64_t **map
)
169 assert(!list_is_empty(&ctx
->buffers
));
170 struct aux_map_buffer
*tail
=
171 list_last_entry(&ctx
->buffers
, struct aux_map_buffer
, link
);
173 *gpu
= tail
->buffer
->gpu
+ ctx
->tail_offset
;
175 *map
= (uint64_t*)((uint8_t*)tail
->buffer
->map
+ ctx
->tail_offset
);
179 add_sub_table(struct gen_aux_map_context
*ctx
, uint32_t size
,
180 uint32_t align
, uint64_t *gpu
, uint64_t **map
)
182 if (!align_and_verify_space(ctx
, size
, align
)) {
183 if (!add_buffer(ctx
))
185 UNUSED
bool aligned
= align_and_verify_space(ctx
, size
, align
);
188 get_current_pos(ctx
, gpu
, map
);
189 memset(*map
, 0, size
);
190 advance_current_pos(ctx
, size
);
195 gen_aux_map_get_state_num(struct gen_aux_map_context
*ctx
)
197 return p_atomic_read(&ctx
->state_num
);
200 struct gen_aux_map_context
*
201 gen_aux_map_init(void *driver_ctx
,
202 struct gen_mapped_pinned_buffer_alloc
*buffer_alloc
,
203 const struct gen_device_info
*devinfo
)
205 struct gen_aux_map_context
*ctx
;
206 if (devinfo
->gen
< 12)
209 ctx
= ralloc(NULL
, struct gen_aux_map_context
);
213 if (pthread_mutex_init(&ctx
->mutex
, NULL
))
216 ctx
->driver_ctx
= driver_ctx
;
217 ctx
->buffer_alloc
= buffer_alloc
;
218 ctx
->num_buffers
= 0;
219 list_inithead(&ctx
->buffers
);
220 ctx
->tail_offset
= 0;
221 ctx
->tail_remaining
= 0;
224 if (add_sub_table(ctx
, 32 * 1024, 32 * 1024, &ctx
->level3_base_addr
,
227 fprintf(stderr
, "AUX-MAP L3: 0x%"PRIx64
", map=%p\n",
228 ctx
->level3_base_addr
, ctx
->level3_map
);
229 p_atomic_inc(&ctx
->state_num
);
238 gen_aux_map_finish(struct gen_aux_map_context
*ctx
)
243 pthread_mutex_destroy(&ctx
->mutex
);
244 list_for_each_entry_safe(struct aux_map_buffer
, buf
, &ctx
->buffers
, link
) {
245 ctx
->buffer_alloc
->free(ctx
->driver_ctx
, buf
->buffer
);
246 list_del(&buf
->link
);
247 p_atomic_dec(&ctx
->num_buffers
);
255 gen_aux_map_get_base(struct gen_aux_map_context
*ctx
)
258 * This get initialized in gen_aux_map_init, and never changes, so there is
259 * no need to lock the mutex.
261 return ctx
->level3_base_addr
;
264 static struct aux_map_buffer
*
265 find_buffer(struct gen_aux_map_context
*ctx
, uint64_t addr
)
267 list_for_each_entry(struct aux_map_buffer
, buf
, &ctx
->buffers
, link
) {
268 if (buf
->buffer
->gpu
<= addr
&& buf
->buffer
->gpu_end
> addr
) {
276 get_u64_entry_ptr(struct gen_aux_map_context
*ctx
, uint64_t addr
)
278 struct aux_map_buffer
*buf
= find_buffer(ctx
, addr
);
280 uintptr_t map_offset
= addr
- buf
->buffer
->gpu
;
281 return (uint64_t*)((uint8_t*)buf
->buffer
->map
+ map_offset
);
285 get_bpp_encoding(uint16_t bpp
)
296 unreachable("Unsupported bpp!");
301 #define GEN_AUX_MAP_ENTRY_Y_TILED_BIT (0x1ull << 52)
304 gen_aux_map_format_bits_for_isl_surf(const struct isl_surf
*isl_surf
)
306 const struct isl_format_layout
*fmtl
=
307 isl_format_get_layout(isl_surf
->format
);
309 uint16_t bpp
= fmtl
->bpb
;
310 assert(fmtl
->bw
== 1 && fmtl
->bh
== 1 && fmtl
->bd
== 1);
312 fprintf(stderr
, "AUX-MAP entry %s, bpp=%d\n",
313 isl_format_get_name(isl_surf
->format
), bpp
);
315 assert(isl_tiling_is_any_y(isl_surf
->tiling
));
317 uint64_t format_bits
=
318 ((uint64_t)isl_format_get_aux_map_encoding(isl_surf
->format
) << 58) |
319 ((uint64_t)get_bpp_encoding(bpp
) << 54) |
320 GEN_AUX_MAP_ENTRY_Y_TILED_BIT
;
322 assert((format_bits
& GEN_AUX_MAP_FORMAT_BITS_MASK
) == format_bits
);
328 get_aux_entry(struct gen_aux_map_context
*ctx
, uint64_t address
,
329 uint32_t *l1_index_out
, uint64_t *l1_entry_addr_out
,
330 uint64_t **l1_entry_map_out
)
332 uint32_t l3_index
= (address
>> 36) & 0xfff;
333 uint64_t *l3_entry
= &ctx
->level3_map
[l3_index
];
336 if ((*l3_entry
& GEN_AUX_MAP_ENTRY_VALID_BIT
) == 0) {
338 if (add_sub_table(ctx
, 32 * 1024, 32 * 1024, &l2_gpu
, &l2_map
)) {
340 fprintf(stderr
, "AUX-MAP L3[0x%x]: 0x%"PRIx64
", map=%p\n",
341 l3_index
, l2_gpu
, l2_map
);
343 unreachable("Failed to add L2 Aux-Map Page Table!");
345 *l3_entry
= (l2_gpu
& 0xffffffff8000ULL
) | 1;
347 uint64_t l2_addr
= gen_canonical_address(*l3_entry
& ~0x7fffULL
);
348 l2_map
= get_u64_entry_ptr(ctx
, l2_addr
);
350 uint32_t l2_index
= (address
>> 24) & 0xfff;
351 uint64_t *l2_entry
= &l2_map
[l2_index
];
353 uint64_t l1_addr
, *l1_map
;
354 if ((*l2_entry
& GEN_AUX_MAP_ENTRY_VALID_BIT
) == 0) {
355 if (add_sub_table(ctx
, 8 * 1024, 8 * 1024, &l1_addr
, &l1_map
)) {
357 fprintf(stderr
, "AUX-MAP L2[0x%x]: 0x%"PRIx64
", map=%p\n",
358 l2_index
, l1_addr
, l1_map
);
360 unreachable("Failed to add L1 Aux-Map Page Table!");
362 *l2_entry
= (l1_addr
& 0xffffffffe000ULL
) | 1;
364 l1_addr
= gen_canonical_address(*l2_entry
& ~0x1fffULL
);
365 l1_map
= get_u64_entry_ptr(ctx
, l1_addr
);
367 uint32_t l1_index
= (address
>> 16) & 0xff;
369 *l1_index_out
= l1_index
;
370 if (l1_entry_addr_out
)
371 *l1_entry_addr_out
= l1_addr
+ l1_index
* sizeof(*l1_map
);
372 if (l1_entry_map_out
)
373 *l1_entry_map_out
= &l1_map
[l1_index
];
377 add_mapping(struct gen_aux_map_context
*ctx
, uint64_t address
,
378 uint64_t aux_address
, uint64_t format_bits
,
382 fprintf(stderr
, "AUX-MAP 0x%"PRIx64
" => 0x%"PRIx64
"\n", address
,
387 get_aux_entry(ctx
, address
, &l1_index
, NULL
, &l1_entry
);
389 const uint64_t l1_data
=
390 (aux_address
& GEN_AUX_MAP_ADDRESS_MASK
) |
392 GEN_AUX_MAP_ENTRY_VALID_BIT
;
394 const uint64_t current_l1_data
= *l1_entry
;
395 if ((current_l1_data
& GEN_AUX_MAP_ENTRY_VALID_BIT
) == 0) {
396 assert((aux_address
& 0xffULL
) == 0);
398 fprintf(stderr
, "AUX-MAP L1[0x%x] 0x%"PRIx64
" -> 0x%"PRIx64
"\n",
399 l1_index
, current_l1_data
, l1_data
);
401 * We use non-zero bits in 63:1 to indicate the entry had been filled
402 * previously. If these bits are non-zero and they don't exactly match
403 * what we want to program into the entry, then we must force the
404 * aux-map tables to be flushed.
406 if (current_l1_data
!= 0 && \
407 (current_l1_data
| GEN_AUX_MAP_ENTRY_VALID_BIT
) != l1_data
)
408 *state_changed
= true;
412 fprintf(stderr
, "AUX-MAP L1[0x%x] is already marked valid!\n",
414 assert(*l1_entry
== l1_data
);
419 gen_aux_map_get_entry(struct gen_aux_map_context
*ctx
,
421 uint64_t *entry_address
)
423 pthread_mutex_lock(&ctx
->mutex
);
424 uint64_t *l1_entry_map
;
425 get_aux_entry(ctx
, address
, NULL
, entry_address
, &l1_entry_map
);
426 pthread_mutex_unlock(&ctx
->mutex
);
432 gen_aux_map_add_mapping(struct gen_aux_map_context
*ctx
, uint64_t address
,
433 uint64_t aux_address
, uint64_t main_size_B
,
434 uint64_t format_bits
)
436 bool state_changed
= false;
437 pthread_mutex_lock(&ctx
->mutex
);
438 uint64_t map_addr
= address
;
439 uint64_t dest_aux_addr
= aux_address
;
440 assert(align64(address
, GEN_AUX_MAP_MAIN_PAGE_SIZE
) == address
);
441 assert(align64(aux_address
, GEN_AUX_MAP_AUX_PAGE_SIZE
) == aux_address
);
442 while (map_addr
- address
< main_size_B
) {
443 add_mapping(ctx
, map_addr
, dest_aux_addr
, format_bits
, &state_changed
);
444 map_addr
+= GEN_AUX_MAP_MAIN_PAGE_SIZE
;
445 dest_aux_addr
+= GEN_AUX_MAP_AUX_PAGE_SIZE
;
447 pthread_mutex_unlock(&ctx
->mutex
);
449 p_atomic_inc(&ctx
->state_num
);
453 gen_aux_map_add_image(struct gen_aux_map_context
*ctx
,
454 const struct isl_surf
*isl_surf
, uint64_t address
,
455 uint64_t aux_address
)
457 gen_aux_map_add_mapping(ctx
, address
, aux_address
, isl_surf
->size_B
,
458 gen_aux_map_format_bits_for_isl_surf(isl_surf
));
462 * We mark the leaf entry as invalid, but we don't attempt to cleanup the
463 * other levels of translation mappings. Since we attempt to re-use VMA
464 * ranges, hopefully this will not lead to unbounded growth of the translation
468 remove_mapping(struct gen_aux_map_context
*ctx
, uint64_t address
,
471 uint32_t l3_index
= (address
>> 36) & 0xfff;
472 uint64_t *l3_entry
= &ctx
->level3_map
[l3_index
];
475 if ((*l3_entry
& GEN_AUX_MAP_ENTRY_VALID_BIT
) == 0) {
478 uint64_t l2_addr
= gen_canonical_address(*l3_entry
& ~0x7fffULL
);
479 l2_map
= get_u64_entry_ptr(ctx
, l2_addr
);
481 uint32_t l2_index
= (address
>> 24) & 0xfff;
482 uint64_t *l2_entry
= &l2_map
[l2_index
];
485 if ((*l2_entry
& GEN_AUX_MAP_ENTRY_VALID_BIT
) == 0) {
488 uint64_t l1_addr
= gen_canonical_address(*l2_entry
& ~0x1fffULL
);
489 l1_map
= get_u64_entry_ptr(ctx
, l1_addr
);
491 uint32_t l1_index
= (address
>> 16) & 0xff;
492 uint64_t *l1_entry
= &l1_map
[l1_index
];
494 const uint64_t current_l1_data
= *l1_entry
;
495 const uint64_t l1_data
= current_l1_data
& ~1ull;
497 if ((current_l1_data
& GEN_AUX_MAP_ENTRY_VALID_BIT
) == 0) {
501 fprintf(stderr
, "AUX-MAP [0x%x][0x%x][0x%x] L1 entry removed!\n",
502 l3_index
, l2_index
, l1_index
);
504 * We use non-zero bits in 63:1 to indicate the entry had been filled
505 * previously. In the unlikely event that these are all zero, we force a
506 * flush of the aux-map tables.
508 if (unlikely(l1_data
== 0))
509 *state_changed
= true;
515 gen_aux_map_unmap_range(struct gen_aux_map_context
*ctx
, uint64_t address
,
518 bool state_changed
= false;
519 pthread_mutex_lock(&ctx
->mutex
);
521 fprintf(stderr
, "AUX-MAP remove 0x%"PRIx64
"-0x%"PRIx64
"\n", address
,
524 uint64_t map_addr
= address
;
525 assert(align64(address
, GEN_AUX_MAP_MAIN_PAGE_SIZE
) == address
);
526 while (map_addr
- address
< size
) {
527 remove_mapping(ctx
, map_addr
, &state_changed
);
528 map_addr
+= 64 * 1024;
530 pthread_mutex_unlock(&ctx
->mutex
);
532 p_atomic_inc(&ctx
->state_num
);
536 gen_aux_map_get_num_buffers(struct gen_aux_map_context
*ctx
)
538 return p_atomic_read(&ctx
->num_buffers
);
542 gen_aux_map_fill_bos(struct gen_aux_map_context
*ctx
, void **driver_bos
,
545 assert(p_atomic_read(&ctx
->num_buffers
) >= max_bos
);
547 list_for_each_entry(struct aux_map_buffer
, buf
, &ctx
->buffers
, link
) {
550 driver_bos
[i
++] = buf
->buffer
->driver_bo
;