2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include <amdgpu_drm.h>
31 #include "amdgpu_id.h"
32 #include "radv_radeon_winsys.h"
33 #include "radv_amdgpu_cs.h"
34 #include "radv_amdgpu_bo.h"
39 VIRTUAL_BUFFER_HASH_TABLE_SIZE
= 1024
42 struct radv_amdgpu_cs
{
43 struct radeon_winsys_cs base
;
44 struct radv_amdgpu_winsys
*ws
;
46 struct amdgpu_cs_ib_info ib
;
48 struct radeon_winsys_bo
*ib_buffer
;
50 unsigned max_num_buffers
;
52 amdgpu_bo_handle
*handles
;
55 struct radeon_winsys_bo
**old_ib_buffers
;
56 unsigned num_old_ib_buffers
;
57 unsigned max_num_old_ib_buffers
;
58 unsigned *ib_size_ptr
;
62 int buffer_hash_table
[1024];
65 unsigned num_virtual_buffers
;
66 unsigned max_num_virtual_buffers
;
67 struct radeon_winsys_bo
**virtual_buffers
;
68 uint8_t *virtual_buffer_priorities
;
69 int *virtual_buffer_hash_table
;
72 static inline struct radv_amdgpu_cs
*
73 radv_amdgpu_cs(struct radeon_winsys_cs
*base
)
75 return (struct radv_amdgpu_cs
*)base
;
78 struct radv_amdgpu_sem_info
{
80 struct radeon_winsys_sem
**wait_sems
;
82 struct radeon_winsys_sem
**signal_sems
;
85 static int ring_to_hw_ip(enum ring_type ring
)
89 return AMDGPU_HW_IP_GFX
;
91 return AMDGPU_HW_IP_DMA
;
93 return AMDGPU_HW_IP_COMPUTE
;
95 unreachable("unsupported ring");
99 static void radv_amdgpu_wait_sems(struct radv_amdgpu_ctx
*ctx
,
102 struct radv_amdgpu_sem_info
*sem_info
);
103 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx
*ctx
,
106 struct radv_amdgpu_sem_info
*sem_info
);
107 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx
*ctx
,
108 struct amdgpu_cs_request
*request
,
109 struct radv_amdgpu_sem_info
*sem_info
);
111 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx
*ctx
,
112 struct radv_amdgpu_fence
*fence
,
113 struct amdgpu_cs_request
*req
)
115 fence
->fence
.context
= ctx
->ctx
;
116 fence
->fence
.ip_type
= req
->ip_type
;
117 fence
->fence
.ip_instance
= req
->ip_instance
;
118 fence
->fence
.ring
= req
->ring
;
119 fence
->fence
.fence
= req
->seq_no
;
120 fence
->user_ptr
= (volatile uint64_t*)(ctx
->fence_map
+ (req
->ip_type
* MAX_RINGS_PER_TYPE
+ req
->ring
) * sizeof(uint64_t));
123 static struct radeon_winsys_fence
*radv_amdgpu_create_fence()
125 struct radv_amdgpu_fence
*fence
= calloc(1, sizeof(struct radv_amdgpu_fence
));
126 return (struct radeon_winsys_fence
*)fence
;
129 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence
*_fence
)
131 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
135 static bool radv_amdgpu_fence_wait(struct radeon_winsys
*_ws
,
136 struct radeon_winsys_fence
*_fence
,
140 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
141 unsigned flags
= absolute
? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE
: 0;
143 uint32_t expired
= 0;
145 if (fence
->user_ptr
) {
146 if (*fence
->user_ptr
>= fence
->fence
.fence
)
148 if (!absolute
&& !timeout
)
152 /* Now use the libdrm query. */
153 r
= amdgpu_cs_query_fence_status(&fence
->fence
,
159 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
169 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs
*rcs
)
171 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(rcs
);
174 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
178 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
179 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
181 free(cs
->old_ib_buffers
);
182 free(cs
->virtual_buffers
);
183 free(cs
->virtual_buffer_priorities
);
184 free(cs
->virtual_buffer_hash_table
);
186 free(cs
->priorities
);
190 static boolean
radv_amdgpu_init_cs(struct radv_amdgpu_cs
*cs
,
191 enum ring_type ring_type
)
193 for (int i
= 0; i
< ARRAY_SIZE(cs
->buffer_hash_table
); ++i
)
194 cs
->buffer_hash_table
[i
] = -1;
196 cs
->hw_ip
= ring_to_hw_ip(ring_type
);
200 static struct radeon_winsys_cs
*
201 radv_amdgpu_cs_create(struct radeon_winsys
*ws
,
202 enum ring_type ring_type
)
204 struct radv_amdgpu_cs
*cs
;
205 uint32_t ib_size
= 20 * 1024 * 4;
206 cs
= calloc(1, sizeof(struct radv_amdgpu_cs
));
210 cs
->ws
= radv_amdgpu_winsys(ws
);
211 radv_amdgpu_init_cs(cs
, ring_type
);
213 if (cs
->ws
->use_ib_bos
) {
214 cs
->ib_buffer
= ws
->buffer_create(ws
, ib_size
, 0,
216 RADEON_FLAG_CPU_ACCESS
);
217 if (!cs
->ib_buffer
) {
222 cs
->ib_mapped
= ws
->buffer_map(cs
->ib_buffer
);
223 if (!cs
->ib_mapped
) {
224 ws
->buffer_destroy(cs
->ib_buffer
);
229 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
230 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
231 cs
->base
.max_dw
= ib_size
/ 4 - 4;
232 cs
->ib_size_ptr
= &cs
->ib
.size
;
235 ws
->cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
237 cs
->base
.buf
= malloc(16384);
238 cs
->base
.max_dw
= 4096;
248 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs
*_cs
, size_t min_size
)
250 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
257 if (!cs
->ws
->use_ib_bos
) {
258 const uint64_t limit_dws
= 0xffff8;
259 uint64_t ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
260 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
262 /* The total ib size cannot exceed limit_dws dwords. */
263 if (ib_dws
> limit_dws
)
270 uint32_t *new_buf
= realloc(cs
->base
.buf
, ib_dws
* 4);
272 cs
->base
.buf
= new_buf
;
273 cs
->base
.max_dw
= ib_dws
;
281 uint64_t ib_size
= MAX2(min_size
* 4 + 16, cs
->base
.max_dw
* 4 * 2);
283 /* max that fits in the chain size field. */
284 ib_size
= MIN2(ib_size
, 0xfffff);
286 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 4)
287 cs
->base
.buf
[cs
->base
.cdw
++] = 0xffff1000;
289 *cs
->ib_size_ptr
|= cs
->base
.cdw
+ 4;
291 if (cs
->num_old_ib_buffers
== cs
->max_num_old_ib_buffers
) {
292 cs
->max_num_old_ib_buffers
= MAX2(1, cs
->max_num_old_ib_buffers
* 2);
293 cs
->old_ib_buffers
= realloc(cs
->old_ib_buffers
,
294 cs
->max_num_old_ib_buffers
* sizeof(void*));
297 cs
->old_ib_buffers
[cs
->num_old_ib_buffers
++] = cs
->ib_buffer
;
299 cs
->ib_buffer
= cs
->ws
->base
.buffer_create(&cs
->ws
->base
, ib_size
, 0,
301 RADEON_FLAG_CPU_ACCESS
);
303 if (!cs
->ib_buffer
) {
306 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
309 cs
->ib_mapped
= cs
->ws
->base
.buffer_map(cs
->ib_buffer
);
310 if (!cs
->ib_mapped
) {
311 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
314 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
317 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
319 cs
->base
.buf
[cs
->base
.cdw
++] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
320 cs
->base
.buf
[cs
->base
.cdw
++] = radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
321 cs
->base
.buf
[cs
->base
.cdw
++] = radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
>> 32;
322 cs
->ib_size_ptr
= cs
->base
.buf
+ cs
->base
.cdw
;
323 cs
->base
.buf
[cs
->base
.cdw
++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
325 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
327 cs
->base
.max_dw
= ib_size
/ 4 - 4;
331 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs
*_cs
)
333 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
335 if (cs
->ws
->use_ib_bos
) {
336 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 0)
337 cs
->base
.buf
[cs
->base
.cdw
++] = 0xffff1000;
339 *cs
->ib_size_ptr
|= cs
->base
.cdw
;
341 cs
->is_chained
= false;
347 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs
*_cs
)
349 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
353 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
354 unsigned hash
= ((uintptr_t)cs
->handles
[i
] >> 6) &
355 (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
356 cs
->buffer_hash_table
[hash
] = -1;
359 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
360 unsigned hash
= ((uintptr_t)cs
->virtual_buffers
[i
] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
361 cs
->virtual_buffer_hash_table
[hash
] = -1;
365 cs
->num_virtual_buffers
= 0;
367 if (cs
->ws
->use_ib_bos
) {
368 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
370 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
371 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
373 cs
->num_old_ib_buffers
= 0;
374 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
375 cs
->ib_size_ptr
= &cs
->ib
.size
;
380 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs
*cs
,
383 unsigned hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
384 int index
= cs
->buffer_hash_table
[hash
];
389 if (cs
->handles
[index
] == bo
)
392 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
393 if (cs
->handles
[i
] == bo
) {
394 cs
->buffer_hash_table
[hash
] = i
;
402 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs
*cs
,
407 int index
= radv_amdgpu_cs_find_buffer(cs
, bo
);
410 cs
->priorities
[index
] = MAX2(cs
->priorities
[index
], priority
);
414 if (cs
->num_buffers
== cs
->max_num_buffers
) {
415 unsigned new_count
= MAX2(1, cs
->max_num_buffers
* 2);
416 cs
->handles
= realloc(cs
->handles
, new_count
* sizeof(amdgpu_bo_handle
));
417 cs
->priorities
= realloc(cs
->priorities
, new_count
* sizeof(uint8_t));
418 cs
->max_num_buffers
= new_count
;
421 cs
->handles
[cs
->num_buffers
] = bo
;
422 cs
->priorities
[cs
->num_buffers
] = priority
;
424 hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
425 cs
->buffer_hash_table
[hash
] = cs
->num_buffers
;
430 static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs
*_cs
,
431 struct radeon_winsys_bo
*bo
,
434 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
435 unsigned hash
= ((uintptr_t)bo
>> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
438 if (!cs
->virtual_buffer_hash_table
) {
439 cs
->virtual_buffer_hash_table
= malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE
* sizeof(int));
440 for (int i
= 0; i
< VIRTUAL_BUFFER_HASH_TABLE_SIZE
; ++i
)
441 cs
->virtual_buffer_hash_table
[i
] = -1;
444 if (cs
->virtual_buffer_hash_table
[hash
] >= 0) {
445 int idx
= cs
->virtual_buffer_hash_table
[hash
];
446 if (cs
->virtual_buffers
[idx
] == bo
) {
447 cs
->virtual_buffer_priorities
[idx
] = MAX2(cs
->virtual_buffer_priorities
[idx
], priority
);
450 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
451 if (cs
->virtual_buffers
[i
] == bo
) {
452 cs
->virtual_buffer_priorities
[i
] = MAX2(cs
->virtual_buffer_priorities
[i
], priority
);
453 cs
->virtual_buffer_hash_table
[hash
] = i
;
459 if(cs
->max_num_virtual_buffers
<= cs
->num_virtual_buffers
) {
460 cs
->max_num_virtual_buffers
= MAX2(2, cs
->max_num_virtual_buffers
* 2);
461 cs
->virtual_buffers
= realloc(cs
->virtual_buffers
, sizeof(struct radv_amdgpu_virtual_virtual_buffer
*) * cs
->max_num_virtual_buffers
);
462 cs
->virtual_buffer_priorities
= realloc(cs
->virtual_buffer_priorities
, sizeof(uint8_t) * cs
->max_num_virtual_buffers
);
465 cs
->virtual_buffers
[cs
->num_virtual_buffers
] = bo
;
466 cs
->virtual_buffer_priorities
[cs
->num_virtual_buffers
] = priority
;
468 cs
->virtual_buffer_hash_table
[hash
] = cs
->num_virtual_buffers
;
469 ++cs
->num_virtual_buffers
;
473 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs
*_cs
,
474 struct radeon_winsys_bo
*_bo
,
477 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
478 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
480 if (bo
->is_virtual
) {
481 radv_amdgpu_cs_add_virtual_buffer(_cs
, _bo
, priority
);
485 radv_amdgpu_cs_add_buffer_internal(cs
, bo
->bo
, priority
);
488 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs
*_parent
,
489 struct radeon_winsys_cs
*_child
)
491 struct radv_amdgpu_cs
*parent
= radv_amdgpu_cs(_parent
);
492 struct radv_amdgpu_cs
*child
= radv_amdgpu_cs(_child
);
494 for (unsigned i
= 0; i
< child
->num_buffers
; ++i
) {
495 radv_amdgpu_cs_add_buffer_internal(parent
, child
->handles
[i
],
496 child
->priorities
[i
]);
499 for (unsigned i
= 0; i
< child
->num_virtual_buffers
; ++i
) {
500 radv_amdgpu_cs_add_buffer(&parent
->base
, child
->virtual_buffers
[i
],
501 child
->virtual_buffer_priorities
[i
]);
504 if (parent
->ws
->use_ib_bos
) {
505 if (parent
->base
.cdw
+ 4 > parent
->base
.max_dw
)
506 radv_amdgpu_cs_grow(&parent
->base
, 4);
508 parent
->base
.buf
[parent
->base
.cdw
++] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
509 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.ib_mc_address
;
510 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.ib_mc_address
>> 32;
511 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.size
;
513 if (parent
->base
.cdw
+ child
->base
.cdw
> parent
->base
.max_dw
)
514 radv_amdgpu_cs_grow(&parent
->base
, child
->base
.cdw
);
516 memcpy(parent
->base
.buf
+ parent
->base
.cdw
, child
->base
.buf
, 4 * child
->base
.cdw
);
517 parent
->base
.cdw
+= child
->base
.cdw
;
521 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys
*ws
,
522 struct radeon_winsys_cs
**cs_array
,
524 struct radv_amdgpu_winsys_bo
*extra_bo
,
525 struct radeon_winsys_cs
*extra_cs
,
526 amdgpu_bo_list_handle
*bo_list
)
529 if (ws
->debug_all_bos
) {
530 struct radv_amdgpu_winsys_bo
*bo
;
531 amdgpu_bo_handle
*handles
;
534 pthread_mutex_lock(&ws
->global_bo_list_lock
);
536 handles
= malloc(sizeof(handles
[0]) * ws
->num_buffers
);
538 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
542 LIST_FOR_EACH_ENTRY(bo
, &ws
->global_bo_list
, global_list_item
) {
543 assert(num
< ws
->num_buffers
);
544 handles
[num
++] = bo
->bo
;
547 r
= amdgpu_bo_list_create(ws
->dev
, ws
->num_buffers
,
551 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
552 } else if (count
== 1 && !extra_bo
&& !extra_cs
&&
553 !radv_amdgpu_cs(cs_array
[0])->num_virtual_buffers
) {
554 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[0];
555 r
= amdgpu_bo_list_create(ws
->dev
, cs
->num_buffers
, cs
->handles
,
556 cs
->priorities
, bo_list
);
558 unsigned total_buffer_count
= !!extra_bo
;
559 unsigned unique_bo_count
= !!extra_bo
;
560 for (unsigned i
= 0; i
< count
; ++i
) {
561 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
562 total_buffer_count
+= cs
->num_buffers
;
563 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
)
564 total_buffer_count
+= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
])->bo_count
;
568 total_buffer_count
+= ((struct radv_amdgpu_cs
*)extra_cs
)->num_buffers
;
571 amdgpu_bo_handle
*handles
= malloc(sizeof(amdgpu_bo_handle
) * total_buffer_count
);
572 uint8_t *priorities
= malloc(sizeof(uint8_t) * total_buffer_count
);
573 if (!handles
|| !priorities
) {
580 handles
[0] = extra_bo
->bo
;
584 for (unsigned i
= 0; i
< count
+ !!extra_cs
; ++i
) {
585 struct radv_amdgpu_cs
*cs
;
588 cs
= (struct radv_amdgpu_cs
*)extra_cs
;
590 cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
592 if (!cs
->num_buffers
)
595 if (unique_bo_count
== 0) {
596 memcpy(handles
, cs
->handles
, cs
->num_buffers
* sizeof(amdgpu_bo_handle
));
597 memcpy(priorities
, cs
->priorities
, cs
->num_buffers
* sizeof(uint8_t));
598 unique_bo_count
= cs
->num_buffers
;
601 int unique_bo_so_far
= unique_bo_count
;
602 for (unsigned j
= 0; j
< cs
->num_buffers
; ++j
) {
604 for (unsigned k
= 0; k
< unique_bo_so_far
; ++k
) {
605 if (handles
[k
] == cs
->handles
[j
]) {
607 priorities
[k
] = MAX2(priorities
[k
],
613 handles
[unique_bo_count
] = cs
->handles
[j
];
614 priorities
[unique_bo_count
] = cs
->priorities
[j
];
618 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
) {
619 struct radv_amdgpu_winsys_bo
*virtual_bo
= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
]);
620 for(unsigned k
= 0; k
< virtual_bo
->bo_count
; ++k
) {
621 struct radv_amdgpu_winsys_bo
*bo
= virtual_bo
->bos
[k
];
623 for (unsigned m
= 0; m
< unique_bo_count
; ++m
) {
624 if (handles
[m
] == bo
->bo
) {
626 priorities
[m
] = MAX2(priorities
[m
],
627 cs
->virtual_buffer_priorities
[j
]);
632 handles
[unique_bo_count
] = bo
->bo
;
633 priorities
[unique_bo_count
] = cs
->virtual_buffer_priorities
[j
];
639 r
= amdgpu_bo_list_create(ws
->dev
, unique_bo_count
, handles
,
640 priorities
, bo_list
);
649 static struct amdgpu_cs_fence_info
radv_set_cs_fence(struct radv_amdgpu_ctx
*ctx
, int ip_type
, int ring
)
651 struct amdgpu_cs_fence_info ret
= {0};
652 if (ctx
->fence_map
) {
653 ret
.handle
= radv_amdgpu_winsys_bo(ctx
->fence_bo
)->bo
;
654 ret
.offset
= (ip_type
* MAX_RINGS_PER_TYPE
+ ring
) * sizeof(uint64_t);
659 static void radv_assign_last_submit(struct radv_amdgpu_ctx
*ctx
,
660 struct amdgpu_cs_request
*request
)
662 radv_amdgpu_request_to_fence(ctx
,
663 &ctx
->last_submission
[request
->ip_type
][request
->ring
],
667 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx
*_ctx
,
669 struct radv_amdgpu_sem_info
*sem_info
,
670 struct radeon_winsys_cs
**cs_array
,
672 struct radeon_winsys_cs
*initial_preamble_cs
,
673 struct radeon_winsys_cs
*continue_preamble_cs
,
674 struct radeon_winsys_fence
*_fence
)
677 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
678 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
679 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
680 amdgpu_bo_list_handle bo_list
;
681 struct amdgpu_cs_request request
= {0};
682 struct amdgpu_cs_ib_info ibs
[2];
684 for (unsigned i
= cs_count
; i
--;) {
685 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
687 if (cs
->is_chained
) {
688 *cs
->ib_size_ptr
-= 4;
689 cs
->is_chained
= false;
692 if (i
+ 1 < cs_count
) {
693 struct radv_amdgpu_cs
*next
= radv_amdgpu_cs(cs_array
[i
+ 1]);
694 assert(cs
->base
.cdw
+ 4 <= cs
->base
.max_dw
);
696 cs
->is_chained
= true;
697 *cs
->ib_size_ptr
+= 4;
699 cs
->base
.buf
[cs
->base
.cdw
+ 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
700 cs
->base
.buf
[cs
->base
.cdw
+ 1] = next
->ib
.ib_mc_address
;
701 cs
->base
.buf
[cs
->base
.cdw
+ 2] = next
->ib
.ib_mc_address
>> 32;
702 cs
->base
.buf
[cs
->base
.cdw
+ 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next
->ib
.size
;
706 r
= radv_amdgpu_create_bo_list(cs0
->ws
, cs_array
, cs_count
, NULL
, initial_preamble_cs
, &bo_list
);
708 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
712 request
.ip_type
= cs0
->hw_ip
;
713 request
.ring
= queue_idx
;
714 request
.number_of_ibs
= 1;
715 request
.ibs
= &cs0
->ib
;
716 request
.resources
= bo_list
;
717 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
719 if (initial_preamble_cs
) {
721 request
.number_of_ibs
= 2;
723 ibs
[0] = ((struct radv_amdgpu_cs
*)initial_preamble_cs
)->ib
;
726 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
729 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
731 fprintf(stderr
, "amdgpu: The CS has been rejected, "
732 "see dmesg for more information.\n");
735 amdgpu_bo_list_destroy(bo_list
);
738 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
740 radv_assign_last_submit(ctx
, &request
);
745 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx
*_ctx
,
747 struct radv_amdgpu_sem_info
*sem_info
,
748 struct radeon_winsys_cs
**cs_array
,
750 struct radeon_winsys_cs
*initial_preamble_cs
,
751 struct radeon_winsys_cs
*continue_preamble_cs
,
752 struct radeon_winsys_fence
*_fence
)
755 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
756 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
757 amdgpu_bo_list_handle bo_list
;
758 struct amdgpu_cs_request request
;
762 for (unsigned i
= 0; i
< cs_count
;) {
763 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[i
]);
764 struct amdgpu_cs_ib_info ibs
[AMDGPU_CS_MAX_IBS_PER_SUBMIT
];
765 struct radeon_winsys_cs
*preamble_cs
= i
? continue_preamble_cs
: initial_preamble_cs
;
766 unsigned cnt
= MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT
- !!preamble_cs
,
769 memset(&request
, 0, sizeof(request
));
771 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
, NULL
,
772 preamble_cs
, &bo_list
);
774 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
778 request
.ip_type
= cs0
->hw_ip
;
779 request
.ring
= queue_idx
;
780 request
.resources
= bo_list
;
781 request
.number_of_ibs
= cnt
+ !!preamble_cs
;
783 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
786 ibs
[0] = radv_amdgpu_cs(preamble_cs
)->ib
;
789 for (unsigned j
= 0; j
< cnt
; ++j
) {
790 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
791 ibs
[j
+ !!preamble_cs
] = cs
->ib
;
793 if (cs
->is_chained
) {
794 *cs
->ib_size_ptr
-= 4;
795 cs
->is_chained
= false;
799 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
802 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
804 fprintf(stderr
, "amdgpu: The CS has been rejected, "
805 "see dmesg for more information.\n");
808 amdgpu_bo_list_destroy(bo_list
);
816 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
818 radv_assign_last_submit(ctx
, &request
);
823 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx
*_ctx
,
825 struct radv_amdgpu_sem_info
*sem_info
,
826 struct radeon_winsys_cs
**cs_array
,
828 struct radeon_winsys_cs
*initial_preamble_cs
,
829 struct radeon_winsys_cs
*continue_preamble_cs
,
830 struct radeon_winsys_fence
*_fence
)
833 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
834 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
835 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
836 struct radeon_winsys
*ws
= (struct radeon_winsys
*)cs0
->ws
;
837 amdgpu_bo_list_handle bo_list
;
838 struct amdgpu_cs_request request
;
839 uint32_t pad_word
= 0xffff1000U
;
841 if (radv_amdgpu_winsys(ws
)->info
.chip_class
== SI
)
842 pad_word
= 0x80000000;
846 for (unsigned i
= 0; i
< cs_count
;) {
847 struct amdgpu_cs_ib_info ib
= {0};
848 struct radeon_winsys_bo
*bo
= NULL
;
849 struct radeon_winsys_cs
*preamble_cs
= i
? continue_preamble_cs
: initial_preamble_cs
;
855 size
+= preamble_cs
->cdw
;
857 while (i
+ cnt
< cs_count
&& 0xffff8 - size
>= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
) {
858 size
+= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
;
864 bo
= ws
->buffer_create(ws
, 4 * size
, 4096, RADEON_DOMAIN_GTT
, RADEON_FLAG_CPU_ACCESS
);
865 ptr
= ws
->buffer_map(bo
);
868 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
869 ptr
+= preamble_cs
->cdw
;
872 for (unsigned j
= 0; j
< cnt
; ++j
) {
873 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
874 memcpy(ptr
, cs
->base
.buf
, 4 * cs
->base
.cdw
);
879 while(!size
|| (size
& 7)) {
884 memset(&request
, 0, sizeof(request
));
887 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
,
888 (struct radv_amdgpu_winsys_bo
*)bo
,
889 preamble_cs
, &bo_list
);
891 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
896 ib
.ib_mc_address
= ws
->buffer_get_va(bo
);
898 request
.ip_type
= cs0
->hw_ip
;
899 request
.ring
= queue_idx
;
900 request
.resources
= bo_list
;
901 request
.number_of_ibs
= 1;
903 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
905 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
908 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
910 fprintf(stderr
, "amdgpu: The CS has been rejected, "
911 "see dmesg for more information.\n");
914 amdgpu_bo_list_destroy(bo_list
);
916 ws
->buffer_destroy(bo
);
923 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
925 radv_assign_last_submit(ctx
, &request
);
930 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx
*_ctx
,
932 struct radeon_winsys_cs
**cs_array
,
934 struct radeon_winsys_cs
*initial_preamble_cs
,
935 struct radeon_winsys_cs
*continue_preamble_cs
,
936 struct radeon_winsys_sem
**wait_sem
,
937 unsigned wait_sem_count
,
938 struct radeon_winsys_sem
**signal_sem
,
939 unsigned signal_sem_count
,
941 struct radeon_winsys_fence
*_fence
)
943 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[0]);
944 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
946 struct radv_amdgpu_sem_info sem_info
= {0};
948 sem_info
.wait_sems
= wait_sem
;
949 sem_info
.wait_sem_count
= wait_sem_count
;
950 sem_info
.signal_sems
= signal_sem
;
951 sem_info
.signal_sem_count
= signal_sem_count
;
953 radv_amdgpu_wait_sems(ctx
, cs
->hw_ip
, queue_idx
, &sem_info
);
955 if (!cs
->ws
->use_ib_bos
) {
956 ret
= radv_amdgpu_winsys_cs_submit_sysmem(_ctx
, queue_idx
, &sem_info
, cs_array
,
957 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
958 } else if (can_patch
&& cs_count
> AMDGPU_CS_MAX_IBS_PER_SUBMIT
&& cs
->ws
->batchchain
) {
959 ret
= radv_amdgpu_winsys_cs_submit_chained(_ctx
, queue_idx
, &sem_info
, cs_array
,
960 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
962 ret
= radv_amdgpu_winsys_cs_submit_fallback(_ctx
, queue_idx
, &sem_info
, cs_array
,
963 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
966 radv_amdgpu_signal_sems(ctx
, cs
->hw_ip
, queue_idx
, &sem_info
);
971 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs
, uint64_t addr
)
973 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
978 for (unsigned i
= 0; i
<= cs
->num_old_ib_buffers
; ++i
) {
979 struct radv_amdgpu_winsys_bo
*bo
;
981 bo
= (struct radv_amdgpu_winsys_bo
*)
982 (i
== cs
->num_old_ib_buffers
? cs
->ib_buffer
: cs
->old_ib_buffers
[i
]);
983 if (addr
>= bo
->va
&& addr
- bo
->va
< bo
->size
) {
984 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0)
985 return (char *)ret
+ (addr
- bo
->va
);
991 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs
*_cs
,
995 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
996 void *ib
= cs
->base
.buf
;
997 int num_dw
= cs
->base
.cdw
;
999 if (cs
->ws
->use_ib_bos
) {
1000 ib
= radv_amdgpu_winsys_get_cpu_addr(cs
, cs
->ib
.ib_mc_address
);
1001 num_dw
= cs
->ib
.size
;
1004 ac_parse_ib(file
, ib
, num_dw
, trace_id
, "main IB", cs
->ws
->info
.chip_class
,
1005 radv_amdgpu_winsys_get_cpu_addr
, cs
);
1008 static struct radeon_winsys_ctx
*radv_amdgpu_ctx_create(struct radeon_winsys
*_ws
)
1010 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1011 struct radv_amdgpu_ctx
*ctx
= CALLOC_STRUCT(radv_amdgpu_ctx
);
1016 r
= amdgpu_cs_ctx_create(ws
->dev
, &ctx
->ctx
);
1018 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r
);
1023 assert(AMDGPU_HW_IP_NUM
* MAX_RINGS_PER_TYPE
* sizeof(uint64_t) <= 4096);
1024 ctx
->fence_bo
= ws
->base
.buffer_create(&ws
->base
, 4096, 8,
1026 RADEON_FLAG_CPU_ACCESS
);
1028 ctx
->fence_map
= (uint64_t*)ws
->base
.buffer_map(ctx
->fence_bo
);
1030 memset(ctx
->fence_map
, 0, 4096);
1031 return (struct radeon_winsys_ctx
*)ctx
;
1037 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx
*rwctx
)
1039 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1040 ctx
->ws
->base
.buffer_destroy(ctx
->fence_bo
);
1041 amdgpu_cs_ctx_free(ctx
->ctx
);
1045 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx
*rwctx
,
1046 enum ring_type ring_type
, int ring_index
)
1048 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1049 int ip_type
= ring_to_hw_ip(ring_type
);
1051 if (ctx
->last_submission
[ip_type
][ring_index
].fence
.fence
) {
1053 int ret
= amdgpu_cs_query_fence_status(&ctx
->last_submission
[ip_type
][ring_index
].fence
,
1054 1000000000ull, 0, &expired
);
1056 if (ret
|| !expired
)
1063 static struct radeon_winsys_sem
*radv_amdgpu_create_sem(struct radeon_winsys
*_ws
)
1066 amdgpu_semaphore_handle sem
;
1068 ret
= amdgpu_cs_create_semaphore(&sem
);
1071 return (struct radeon_winsys_sem
*)sem
;
1074 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem
*_sem
)
1076 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)_sem
;
1077 amdgpu_cs_destroy_semaphore(sem
);
1080 static void radv_amdgpu_wait_sems(struct radv_amdgpu_ctx
*ctx
,
1083 struct radv_amdgpu_sem_info
*sem_info
)
1085 for (unsigned i
= 0; i
< sem_info
->wait_sem_count
; i
++) {
1086 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)sem_info
->wait_sems
[i
];
1087 amdgpu_cs_wait_semaphore(ctx
->ctx
, ip_type
, 0, ring
,
1092 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx
*ctx
,
1095 struct radv_amdgpu_sem_info
*sem_info
)
1097 for (unsigned i
= 0; i
< sem_info
->signal_sem_count
; i
++) {
1098 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)sem_info
->signal_sems
[i
];
1099 amdgpu_cs_signal_semaphore(ctx
->ctx
, ip_type
, 0, ring
,
1105 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx
*ctx
,
1106 struct amdgpu_cs_request
*request
,
1107 struct radv_amdgpu_sem_info
*sem_info
)
1109 return amdgpu_cs_submit(ctx
->ctx
, 0, request
, 1);
1112 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys
*ws
)
1114 ws
->base
.ctx_create
= radv_amdgpu_ctx_create
;
1115 ws
->base
.ctx_destroy
= radv_amdgpu_ctx_destroy
;
1116 ws
->base
.ctx_wait_idle
= radv_amdgpu_ctx_wait_idle
;
1117 ws
->base
.cs_create
= radv_amdgpu_cs_create
;
1118 ws
->base
.cs_destroy
= radv_amdgpu_cs_destroy
;
1119 ws
->base
.cs_grow
= radv_amdgpu_cs_grow
;
1120 ws
->base
.cs_finalize
= radv_amdgpu_cs_finalize
;
1121 ws
->base
.cs_reset
= radv_amdgpu_cs_reset
;
1122 ws
->base
.cs_add_buffer
= radv_amdgpu_cs_add_buffer
;
1123 ws
->base
.cs_execute_secondary
= radv_amdgpu_cs_execute_secondary
;
1124 ws
->base
.cs_submit
= radv_amdgpu_winsys_cs_submit
;
1125 ws
->base
.cs_dump
= radv_amdgpu_winsys_cs_dump
;
1126 ws
->base
.create_fence
= radv_amdgpu_create_fence
;
1127 ws
->base
.destroy_fence
= radv_amdgpu_destroy_fence
;
1128 ws
->base
.create_sem
= radv_amdgpu_create_sem
;
1129 ws
->base
.destroy_sem
= radv_amdgpu_destroy_sem
;
1130 ws
->base
.fence_wait
= radv_amdgpu_fence_wait
;