2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "drm-uapi/amdgpu_drm.h"
32 #include "util/u_memory.h"
34 #include "radv_radeon_winsys.h"
35 #include "radv_amdgpu_cs.h"
36 #include "radv_amdgpu_bo.h"
41 VIRTUAL_BUFFER_HASH_TABLE_SIZE
= 1024
44 struct radv_amdgpu_cs
{
45 struct radeon_cmdbuf base
;
46 struct radv_amdgpu_winsys
*ws
;
48 struct amdgpu_cs_ib_info ib
;
50 struct radeon_winsys_bo
*ib_buffer
;
52 unsigned max_num_buffers
;
54 struct drm_amdgpu_bo_list_entry
*handles
;
56 struct radeon_winsys_bo
**old_ib_buffers
;
57 unsigned num_old_ib_buffers
;
58 unsigned max_num_old_ib_buffers
;
59 unsigned *ib_size_ptr
;
63 int buffer_hash_table
[1024];
66 unsigned num_virtual_buffers
;
67 unsigned max_num_virtual_buffers
;
68 struct radeon_winsys_bo
**virtual_buffers
;
69 int *virtual_buffer_hash_table
;
71 /* For chips that don't support chaining. */
72 struct radeon_cmdbuf
*old_cs_buffers
;
73 unsigned num_old_cs_buffers
;
76 static inline struct radv_amdgpu_cs
*
77 radv_amdgpu_cs(struct radeon_cmdbuf
*base
)
79 return (struct radv_amdgpu_cs
*)base
;
82 static int ring_to_hw_ip(enum ring_type ring
)
86 return AMDGPU_HW_IP_GFX
;
88 return AMDGPU_HW_IP_DMA
;
90 return AMDGPU_HW_IP_COMPUTE
;
92 unreachable("unsupported ring");
96 struct radv_amdgpu_cs_request
{
97 /** Specify flags with additional information */
100 /** Specify HW IP block type to which to send the IB. */
103 /** IP instance index if there are several IPs of the same type. */
104 unsigned ip_instance
;
107 * Specify ring index of the IP. We could have several rings
108 * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
113 * List handle with resources used by this request. This is a raw
114 * bo list handle used by the kernel.
119 * Number of dependencies this Command submission needs to
120 * wait for before starting execution.
122 uint32_t number_of_dependencies
;
125 * Array of dependencies which need to be met before
126 * execution can start.
128 struct amdgpu_cs_fence
*dependencies
;
130 /** Number of IBs to submit in the field ibs. */
131 uint32_t number_of_ibs
;
134 * IBs to submit. Those IBs will be submit together as single entity
136 struct amdgpu_cs_ib_info
*ibs
;
139 * The returned sequence number for the command submission
144 * The fence information
146 struct amdgpu_cs_fence_info fence_info
;
150 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx
*ctx
,
153 struct radv_winsys_sem_info
*sem_info
);
154 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx
*ctx
,
155 struct radv_amdgpu_cs_request
*request
,
156 struct radv_winsys_sem_info
*sem_info
);
158 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx
*ctx
,
159 struct radv_amdgpu_fence
*fence
,
160 struct radv_amdgpu_cs_request
*req
)
162 fence
->fence
.context
= ctx
->ctx
;
163 fence
->fence
.ip_type
= req
->ip_type
;
164 fence
->fence
.ip_instance
= req
->ip_instance
;
165 fence
->fence
.ring
= req
->ring
;
166 fence
->fence
.fence
= req
->seq_no
;
167 fence
->user_ptr
= (volatile uint64_t*)(ctx
->fence_map
+ req
->ip_type
* MAX_RINGS_PER_TYPE
+ req
->ring
);
170 static struct radeon_winsys_fence
*radv_amdgpu_create_fence()
172 struct radv_amdgpu_fence
*fence
= calloc(1, sizeof(struct radv_amdgpu_fence
));
173 fence
->fence
.fence
= UINT64_MAX
;
174 return (struct radeon_winsys_fence
*)fence
;
177 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence
*_fence
)
179 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
183 static void radv_amdgpu_reset_fence(struct radeon_winsys_fence
*_fence
)
185 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
186 fence
->fence
.fence
= UINT64_MAX
;
189 static void radv_amdgpu_signal_fence(struct radeon_winsys_fence
*_fence
)
191 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
192 fence
->fence
.fence
= 0;
195 static bool radv_amdgpu_is_fence_waitable(struct radeon_winsys_fence
*_fence
)
197 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
198 return fence
->fence
.fence
< UINT64_MAX
;
201 static bool radv_amdgpu_fence_wait(struct radeon_winsys
*_ws
,
202 struct radeon_winsys_fence
*_fence
,
206 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
207 unsigned flags
= absolute
? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE
: 0;
209 uint32_t expired
= 0;
211 /* Special casing 0 and UINT64_MAX so that they work without user_ptr/fence.ctx */
212 if (fence
->fence
.fence
== UINT64_MAX
)
215 if (fence
->fence
.fence
== 0)
218 if (fence
->user_ptr
) {
219 if (*fence
->user_ptr
>= fence
->fence
.fence
)
221 if (!absolute
&& !timeout
)
225 /* Now use the libdrm query. */
226 r
= amdgpu_cs_query_fence_status(&fence
->fence
,
232 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
243 static bool radv_amdgpu_fences_wait(struct radeon_winsys
*_ws
,
244 struct radeon_winsys_fence
*const *_fences
,
245 uint32_t fence_count
,
249 struct amdgpu_cs_fence
*fences
= malloc(sizeof(struct amdgpu_cs_fence
) * fence_count
);
251 uint32_t expired
= 0, first
= 0;
256 for (uint32_t i
= 0; i
< fence_count
; ++i
)
257 fences
[i
] = ((struct radv_amdgpu_fence
*)_fences
[i
])->fence
;
259 /* Now use the libdrm query. */
260 r
= amdgpu_cs_wait_fences(fences
, fence_count
, wait_all
,
261 timeout
, &expired
, &first
);
265 fprintf(stderr
, "amdgpu: amdgpu_cs_wait_fences failed.\n");
275 static void radv_amdgpu_cs_destroy(struct radeon_cmdbuf
*rcs
)
277 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(rcs
);
280 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
284 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
285 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
287 for (unsigned i
= 0; i
< cs
->num_old_cs_buffers
; ++i
) {
288 struct radeon_cmdbuf
*rcs
= &cs
->old_cs_buffers
[i
];
292 free(cs
->old_cs_buffers
);
293 free(cs
->old_ib_buffers
);
294 free(cs
->virtual_buffers
);
295 free(cs
->virtual_buffer_hash_table
);
300 static void radv_amdgpu_init_cs(struct radv_amdgpu_cs
*cs
,
301 enum ring_type ring_type
)
303 for (int i
= 0; i
< ARRAY_SIZE(cs
->buffer_hash_table
); ++i
)
304 cs
->buffer_hash_table
[i
] = -1;
306 cs
->hw_ip
= ring_to_hw_ip(ring_type
);
309 static struct radeon_cmdbuf
*
310 radv_amdgpu_cs_create(struct radeon_winsys
*ws
,
311 enum ring_type ring_type
)
313 struct radv_amdgpu_cs
*cs
;
314 uint32_t ib_size
= 20 * 1024 * 4;
315 cs
= calloc(1, sizeof(struct radv_amdgpu_cs
));
319 cs
->ws
= radv_amdgpu_winsys(ws
);
320 radv_amdgpu_init_cs(cs
, ring_type
);
322 if (cs
->ws
->use_ib_bos
) {
323 cs
->ib_buffer
= ws
->buffer_create(ws
, ib_size
, 0,
325 RADEON_FLAG_CPU_ACCESS
|
326 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
327 RADEON_FLAG_READ_ONLY
,
328 RADV_BO_PRIORITY_CS
);
329 if (!cs
->ib_buffer
) {
334 cs
->ib_mapped
= ws
->buffer_map(cs
->ib_buffer
);
335 if (!cs
->ib_mapped
) {
336 ws
->buffer_destroy(cs
->ib_buffer
);
341 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
;
342 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
343 cs
->base
.max_dw
= ib_size
/ 4 - 4;
344 cs
->ib_size_ptr
= &cs
->ib
.size
;
347 ws
->cs_add_buffer(&cs
->base
, cs
->ib_buffer
);
349 cs
->base
.buf
= malloc(16384);
350 cs
->base
.max_dw
= 4096;
360 static void radv_amdgpu_cs_grow(struct radeon_cmdbuf
*_cs
, size_t min_size
)
362 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
369 if (!cs
->ws
->use_ib_bos
) {
370 const uint64_t limit_dws
= 0xffff8;
371 uint64_t ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
372 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
374 /* The total ib size cannot exceed limit_dws dwords. */
375 if (ib_dws
> limit_dws
)
377 /* The maximum size in dwords has been reached,
378 * try to allocate a new one.
381 realloc(cs
->old_cs_buffers
,
382 (cs
->num_old_cs_buffers
+ 1) * sizeof(*cs
->old_cs_buffers
));
383 if (!cs
->old_cs_buffers
) {
389 /* Store the current one for submitting it later. */
390 cs
->old_cs_buffers
[cs
->num_old_cs_buffers
].cdw
= cs
->base
.cdw
;
391 cs
->old_cs_buffers
[cs
->num_old_cs_buffers
].max_dw
= cs
->base
.max_dw
;
392 cs
->old_cs_buffers
[cs
->num_old_cs_buffers
].buf
= cs
->base
.buf
;
393 cs
->num_old_cs_buffers
++;
395 /* Reset the cs, it will be re-allocated below. */
399 /* Re-compute the number of dwords to allocate. */
400 ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
401 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
402 if (ib_dws
> limit_dws
) {
403 fprintf(stderr
, "amdgpu: Too high number of "
404 "dwords to allocate\n");
410 uint32_t *new_buf
= realloc(cs
->base
.buf
, ib_dws
* 4);
412 cs
->base
.buf
= new_buf
;
413 cs
->base
.max_dw
= ib_dws
;
421 uint64_t ib_size
= MAX2(min_size
* 4 + 16, cs
->base
.max_dw
* 4 * 2);
423 /* max that fits in the chain size field. */
424 ib_size
= MIN2(ib_size
, 0xfffff);
426 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 4)
427 radeon_emit(&cs
->base
, 0xffff1000);
429 *cs
->ib_size_ptr
|= cs
->base
.cdw
+ 4;
431 if (cs
->num_old_ib_buffers
== cs
->max_num_old_ib_buffers
) {
432 cs
->max_num_old_ib_buffers
= MAX2(1, cs
->max_num_old_ib_buffers
* 2);
433 cs
->old_ib_buffers
= realloc(cs
->old_ib_buffers
,
434 cs
->max_num_old_ib_buffers
* sizeof(void*));
437 cs
->old_ib_buffers
[cs
->num_old_ib_buffers
++] = cs
->ib_buffer
;
439 cs
->ib_buffer
= cs
->ws
->base
.buffer_create(&cs
->ws
->base
, ib_size
, 0,
441 RADEON_FLAG_CPU_ACCESS
|
442 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
443 RADEON_FLAG_READ_ONLY
,
444 RADV_BO_PRIORITY_CS
);
446 if (!cs
->ib_buffer
) {
449 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
452 cs
->ib_mapped
= cs
->ws
->base
.buffer_map(cs
->ib_buffer
);
453 if (!cs
->ib_mapped
) {
454 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
457 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
460 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
);
462 radeon_emit(&cs
->base
, PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0));
463 radeon_emit(&cs
->base
, radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
);
464 radeon_emit(&cs
->base
, radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
>> 32);
465 radeon_emit(&cs
->base
, S_3F2_CHAIN(1) | S_3F2_VALID(1));
467 cs
->ib_size_ptr
= cs
->base
.buf
+ cs
->base
.cdw
- 1;
469 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
471 cs
->base
.max_dw
= ib_size
/ 4 - 4;
475 static bool radv_amdgpu_cs_finalize(struct radeon_cmdbuf
*_cs
)
477 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
479 if (cs
->ws
->use_ib_bos
) {
480 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 0)
481 radeon_emit(&cs
->base
, 0xffff1000);
483 *cs
->ib_size_ptr
|= cs
->base
.cdw
;
485 cs
->is_chained
= false;
491 static void radv_amdgpu_cs_reset(struct radeon_cmdbuf
*_cs
)
493 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
497 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
498 unsigned hash
= cs
->handles
[i
].bo_handle
&
499 (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
500 cs
->buffer_hash_table
[hash
] = -1;
503 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
504 unsigned hash
= ((uintptr_t)cs
->virtual_buffers
[i
] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
505 cs
->virtual_buffer_hash_table
[hash
] = -1;
509 cs
->num_virtual_buffers
= 0;
511 if (cs
->ws
->use_ib_bos
) {
512 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
);
514 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
515 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
517 cs
->num_old_ib_buffers
= 0;
518 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
;
519 cs
->ib_size_ptr
= &cs
->ib
.size
;
522 for (unsigned i
= 0; i
< cs
->num_old_cs_buffers
; ++i
) {
523 struct radeon_cmdbuf
*rcs
= &cs
->old_cs_buffers
[i
];
527 free(cs
->old_cs_buffers
);
528 cs
->old_cs_buffers
= NULL
;
529 cs
->num_old_cs_buffers
= 0;
533 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs
*cs
,
536 unsigned hash
= bo
& (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
537 int index
= cs
->buffer_hash_table
[hash
];
542 if (cs
->handles
[index
].bo_handle
== bo
)
545 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
546 if (cs
->handles
[i
].bo_handle
== bo
) {
547 cs
->buffer_hash_table
[hash
] = i
;
555 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs
*cs
,
556 uint32_t bo
, uint8_t priority
)
559 int index
= radv_amdgpu_cs_find_buffer(cs
, bo
);
561 if (index
!= -1 || cs
->failed
)
564 if (cs
->num_buffers
== cs
->max_num_buffers
) {
565 unsigned new_count
= MAX2(1, cs
->max_num_buffers
* 2);
566 struct drm_amdgpu_bo_list_entry
*new_entries
=
567 realloc(cs
->handles
, new_count
* sizeof(struct drm_amdgpu_bo_list_entry
));
569 cs
->max_num_buffers
= new_count
;
570 cs
->handles
= new_entries
;
577 cs
->handles
[cs
->num_buffers
].bo_handle
= bo
;
578 cs
->handles
[cs
->num_buffers
].bo_priority
= priority
;
580 hash
= bo
& (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
581 cs
->buffer_hash_table
[hash
] = cs
->num_buffers
;
586 static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_cmdbuf
*_cs
,
587 struct radeon_winsys_bo
*bo
)
589 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
590 unsigned hash
= ((uintptr_t)bo
>> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
593 if (!cs
->virtual_buffer_hash_table
) {
594 cs
->virtual_buffer_hash_table
= malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE
* sizeof(int));
595 for (int i
= 0; i
< VIRTUAL_BUFFER_HASH_TABLE_SIZE
; ++i
)
596 cs
->virtual_buffer_hash_table
[i
] = -1;
599 if (cs
->virtual_buffer_hash_table
[hash
] >= 0) {
600 int idx
= cs
->virtual_buffer_hash_table
[hash
];
601 if (cs
->virtual_buffers
[idx
] == bo
) {
604 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
605 if (cs
->virtual_buffers
[i
] == bo
) {
606 cs
->virtual_buffer_hash_table
[hash
] = i
;
612 if(cs
->max_num_virtual_buffers
<= cs
->num_virtual_buffers
) {
613 cs
->max_num_virtual_buffers
= MAX2(2, cs
->max_num_virtual_buffers
* 2);
614 cs
->virtual_buffers
= realloc(cs
->virtual_buffers
, sizeof(struct radv_amdgpu_virtual_virtual_buffer
*) * cs
->max_num_virtual_buffers
);
617 cs
->virtual_buffers
[cs
->num_virtual_buffers
] = bo
;
619 cs
->virtual_buffer_hash_table
[hash
] = cs
->num_virtual_buffers
;
620 ++cs
->num_virtual_buffers
;
624 static void radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf
*_cs
,
625 struct radeon_winsys_bo
*_bo
)
627 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
628 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
630 if (bo
->is_virtual
) {
631 radv_amdgpu_cs_add_virtual_buffer(_cs
, _bo
);
635 if (bo
->base
.is_local
)
638 radv_amdgpu_cs_add_buffer_internal(cs
, bo
->bo_handle
, bo
->priority
);
641 static void radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf
*_parent
,
642 struct radeon_cmdbuf
*_child
)
644 struct radv_amdgpu_cs
*parent
= radv_amdgpu_cs(_parent
);
645 struct radv_amdgpu_cs
*child
= radv_amdgpu_cs(_child
);
647 for (unsigned i
= 0; i
< child
->num_buffers
; ++i
) {
648 radv_amdgpu_cs_add_buffer_internal(parent
,
649 child
->handles
[i
].bo_handle
,
650 child
->handles
[i
].bo_priority
);
653 for (unsigned i
= 0; i
< child
->num_virtual_buffers
; ++i
) {
654 radv_amdgpu_cs_add_buffer(&parent
->base
, child
->virtual_buffers
[i
]);
657 if (parent
->ws
->use_ib_bos
) {
658 if (parent
->base
.cdw
+ 4 > parent
->base
.max_dw
)
659 radv_amdgpu_cs_grow(&parent
->base
, 4);
661 radeon_emit(&parent
->base
, PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0));
662 radeon_emit(&parent
->base
, child
->ib
.ib_mc_address
);
663 radeon_emit(&parent
->base
, child
->ib
.ib_mc_address
>> 32);
664 radeon_emit(&parent
->base
, child
->ib
.size
);
666 if (parent
->base
.cdw
+ child
->base
.cdw
> parent
->base
.max_dw
)
667 radv_amdgpu_cs_grow(&parent
->base
, child
->base
.cdw
);
669 memcpy(parent
->base
.buf
+ parent
->base
.cdw
, child
->base
.buf
, 4 * child
->base
.cdw
);
670 parent
->base
.cdw
+= child
->base
.cdw
;
674 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys
*ws
,
675 struct radeon_cmdbuf
**cs_array
,
677 struct radv_amdgpu_winsys_bo
**extra_bo_array
,
678 unsigned num_extra_bo
,
679 struct radeon_cmdbuf
*extra_cs
,
680 const struct radv_winsys_bo_list
*radv_bo_list
,
685 if (ws
->debug_all_bos
) {
686 struct radv_amdgpu_winsys_bo
*bo
;
687 struct drm_amdgpu_bo_list_entry
*handles
;
690 pthread_mutex_lock(&ws
->global_bo_list_lock
);
692 handles
= malloc(sizeof(handles
[0]) * ws
->num_buffers
);
694 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
698 LIST_FOR_EACH_ENTRY(bo
, &ws
->global_bo_list
, global_list_item
) {
699 assert(num
< ws
->num_buffers
);
700 handles
[num
].bo_handle
= bo
->bo_handle
;
701 handles
[num
].bo_priority
= bo
->priority
;
705 r
= amdgpu_bo_list_create_raw(ws
->dev
, ws
->num_buffers
,
708 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
709 } else if (count
== 1 && !num_extra_bo
&& !extra_cs
&& !radv_bo_list
&&
710 !radv_amdgpu_cs(cs_array
[0])->num_virtual_buffers
) {
711 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[0];
712 if (cs
->num_buffers
== 0) {
716 r
= amdgpu_bo_list_create_raw(ws
->dev
, cs
->num_buffers
, cs
->handles
,
719 unsigned total_buffer_count
= num_extra_bo
;
720 unsigned unique_bo_count
= num_extra_bo
;
721 for (unsigned i
= 0; i
< count
; ++i
) {
722 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
723 total_buffer_count
+= cs
->num_buffers
;
724 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
)
725 total_buffer_count
+= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
])->bo_count
;
729 total_buffer_count
+= ((struct radv_amdgpu_cs
*)extra_cs
)->num_buffers
;
733 total_buffer_count
+= radv_bo_list
->count
;
736 if (total_buffer_count
== 0) {
740 struct drm_amdgpu_bo_list_entry
*handles
= malloc(sizeof(struct drm_amdgpu_bo_list_entry
) * total_buffer_count
);
744 for (unsigned i
= 0; i
< num_extra_bo
; i
++) {
745 handles
[i
].bo_handle
= extra_bo_array
[i
]->bo_handle
;
746 handles
[i
].bo_priority
= extra_bo_array
[i
]->priority
;
749 for (unsigned i
= 0; i
< count
+ !!extra_cs
; ++i
) {
750 struct radv_amdgpu_cs
*cs
;
753 cs
= (struct radv_amdgpu_cs
*)extra_cs
;
755 cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
757 if (!cs
->num_buffers
)
760 if (unique_bo_count
== 0 && !cs
->num_virtual_buffers
) {
761 memcpy(handles
, cs
->handles
, cs
->num_buffers
* sizeof(struct drm_amdgpu_bo_list_entry
));
762 unique_bo_count
= cs
->num_buffers
;
765 int unique_bo_so_far
= unique_bo_count
;
766 for (unsigned j
= 0; j
< cs
->num_buffers
; ++j
) {
768 for (unsigned k
= 0; k
< unique_bo_so_far
; ++k
) {
769 if (handles
[k
].bo_handle
== cs
->handles
[j
].bo_handle
) {
775 handles
[unique_bo_count
] = cs
->handles
[j
];
779 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
) {
780 struct radv_amdgpu_winsys_bo
*virtual_bo
= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
]);
781 for(unsigned k
= 0; k
< virtual_bo
->bo_count
; ++k
) {
782 struct radv_amdgpu_winsys_bo
*bo
= virtual_bo
->bos
[k
];
784 for (unsigned m
= 0; m
< unique_bo_count
; ++m
) {
785 if (handles
[m
].bo_handle
== bo
->bo_handle
) {
791 handles
[unique_bo_count
].bo_handle
= bo
->bo_handle
;
792 handles
[unique_bo_count
].bo_priority
= bo
->priority
;
800 unsigned unique_bo_so_far
= unique_bo_count
;
801 for (unsigned i
= 0; i
< radv_bo_list
->count
; ++i
) {
802 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(radv_bo_list
->bos
[i
]);
804 for (unsigned j
= 0; j
< unique_bo_so_far
; ++j
) {
805 if (bo
->bo_handle
== handles
[j
].bo_handle
) {
811 handles
[unique_bo_count
].bo_handle
= bo
->bo_handle
;
812 handles
[unique_bo_count
].bo_priority
= bo
->priority
;
818 if (unique_bo_count
> 0) {
819 r
= amdgpu_bo_list_create_raw(ws
->dev
, unique_bo_count
, handles
,
831 static struct amdgpu_cs_fence_info
radv_set_cs_fence(struct radv_amdgpu_ctx
*ctx
, int ip_type
, int ring
)
833 struct amdgpu_cs_fence_info ret
= {0};
834 if (ctx
->fence_map
) {
835 ret
.handle
= radv_amdgpu_winsys_bo(ctx
->fence_bo
)->bo
;
836 ret
.offset
= (ip_type
* MAX_RINGS_PER_TYPE
+ ring
) * sizeof(uint64_t);
841 static void radv_assign_last_submit(struct radv_amdgpu_ctx
*ctx
,
842 struct radv_amdgpu_cs_request
*request
)
844 radv_amdgpu_request_to_fence(ctx
,
845 &ctx
->last_submission
[request
->ip_type
][request
->ring
],
849 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx
*_ctx
,
851 struct radv_winsys_sem_info
*sem_info
,
852 const struct radv_winsys_bo_list
*radv_bo_list
,
853 struct radeon_cmdbuf
**cs_array
,
855 struct radeon_cmdbuf
*initial_preamble_cs
,
856 struct radeon_cmdbuf
*continue_preamble_cs
,
857 struct radeon_winsys_fence
*_fence
)
860 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
861 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
862 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
864 struct radv_amdgpu_cs_request request
= {0};
865 struct amdgpu_cs_ib_info ibs
[2];
866 unsigned number_of_ibs
= 1;
868 for (unsigned i
= cs_count
; i
--;) {
869 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
871 if (cs
->is_chained
) {
872 *cs
->ib_size_ptr
-= 4;
873 cs
->is_chained
= false;
876 if (i
+ 1 < cs_count
) {
877 struct radv_amdgpu_cs
*next
= radv_amdgpu_cs(cs_array
[i
+ 1]);
878 assert(cs
->base
.cdw
+ 4 <= cs
->base
.max_dw
);
880 cs
->is_chained
= true;
881 *cs
->ib_size_ptr
+= 4;
883 cs
->base
.buf
[cs
->base
.cdw
+ 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
884 cs
->base
.buf
[cs
->base
.cdw
+ 1] = next
->ib
.ib_mc_address
;
885 cs
->base
.buf
[cs
->base
.cdw
+ 2] = next
->ib
.ib_mc_address
>> 32;
886 cs
->base
.buf
[cs
->base
.cdw
+ 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next
->ib
.size
;
890 /* Create a buffer object list. */
891 r
= radv_amdgpu_create_bo_list(cs0
->ws
, cs_array
, cs_count
, NULL
, 0,
892 initial_preamble_cs
, radv_bo_list
,
895 fprintf(stderr
, "amdgpu: buffer list creation failed for the "
896 "chained submission(%d)\n", r
);
900 /* Configure the CS request. */
901 if (initial_preamble_cs
) {
902 ibs
[0] = radv_amdgpu_cs(initial_preamble_cs
)->ib
;
909 request
.ip_type
= cs0
->hw_ip
;
910 request
.ring
= queue_idx
;
911 request
.number_of_ibs
= number_of_ibs
;
913 request
.resources
= bo_list
;
914 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
917 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
920 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
922 fprintf(stderr
, "amdgpu: The CS has been rejected, "
923 "see dmesg for more information.\n");
926 amdgpu_bo_list_destroy_raw(ctx
->ws
->dev
, bo_list
);
932 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
934 radv_assign_last_submit(ctx
, &request
);
939 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx
*_ctx
,
941 struct radv_winsys_sem_info
*sem_info
,
942 const struct radv_winsys_bo_list
*radv_bo_list
,
943 struct radeon_cmdbuf
**cs_array
,
945 struct radeon_cmdbuf
*initial_preamble_cs
,
946 struct radeon_cmdbuf
*continue_preamble_cs
,
947 struct radeon_winsys_fence
*_fence
)
950 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
951 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
953 struct radv_amdgpu_cs_request request
= {};
954 struct amdgpu_cs_ib_info
*ibs
;
955 struct radv_amdgpu_cs
*cs0
;
956 unsigned number_of_ibs
;
959 cs0
= radv_amdgpu_cs(cs_array
[0]);
961 /* Compute the number of IBs for this submit. */
962 number_of_ibs
= cs_count
+ !!initial_preamble_cs
;
964 /* Create a buffer object list. */
965 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[0], cs_count
, NULL
, 0,
966 initial_preamble_cs
, radv_bo_list
,
969 fprintf(stderr
, "amdgpu: buffer list creation failed "
970 "for the fallback submission (%d)\n", r
);
974 ibs
= malloc(number_of_ibs
* sizeof(*ibs
));
976 amdgpu_bo_list_destroy_raw(ctx
->ws
->dev
, bo_list
);
980 /* Configure the CS request. */
981 if (initial_preamble_cs
)
982 ibs
[0] = radv_amdgpu_cs(initial_preamble_cs
)->ib
;
984 for (unsigned i
= 0; i
< cs_count
; i
++) {
985 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
987 ibs
[i
+ !!initial_preamble_cs
] = cs
->ib
;
989 if (cs
->is_chained
) {
990 *cs
->ib_size_ptr
-= 4;
991 cs
->is_chained
= false;
995 request
.ip_type
= cs0
->hw_ip
;
996 request
.ring
= queue_idx
;
997 request
.resources
= bo_list
;
998 request
.number_of_ibs
= number_of_ibs
;
1000 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
1002 /* Submit the CS. */
1003 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
1006 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
1008 fprintf(stderr
, "amdgpu: The CS has been rejected, "
1009 "see dmesg for more information.\n");
1012 amdgpu_bo_list_destroy_raw(ctx
->ws
->dev
, bo_list
);
1019 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
1021 radv_assign_last_submit(ctx
, &request
);
1026 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx
*_ctx
,
1028 struct radv_winsys_sem_info
*sem_info
,
1029 const struct radv_winsys_bo_list
*radv_bo_list
,
1030 struct radeon_cmdbuf
**cs_array
,
1032 struct radeon_cmdbuf
*initial_preamble_cs
,
1033 struct radeon_cmdbuf
*continue_preamble_cs
,
1034 struct radeon_winsys_fence
*_fence
)
1037 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
1038 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
1039 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
1040 struct radeon_winsys
*ws
= (struct radeon_winsys
*)cs0
->ws
;
1042 struct radv_amdgpu_cs_request request
;
1043 uint32_t pad_word
= 0xffff1000U
;
1044 bool emit_signal_sem
= sem_info
->cs_emit_signal
;
1046 if (radv_amdgpu_winsys(ws
)->info
.chip_class
== GFX6
)
1047 pad_word
= 0x80000000;
1051 for (unsigned i
= 0; i
< cs_count
;) {
1052 struct amdgpu_cs_ib_info
*ibs
;
1053 struct radeon_winsys_bo
**bos
;
1054 struct radeon_cmdbuf
*preamble_cs
= i
? continue_preamble_cs
: initial_preamble_cs
;
1055 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
1056 unsigned number_of_ibs
;
1060 unsigned pad_words
= 0;
1062 /* Compute the number of IBs for this submit. */
1063 number_of_ibs
= cs
->num_old_cs_buffers
+ 1;
1065 ibs
= malloc(number_of_ibs
* sizeof(*ibs
));
1069 bos
= malloc(number_of_ibs
* sizeof(*bos
));
1075 if (number_of_ibs
> 1) {
1076 /* Special path when the maximum size in dwords has
1077 * been reached because we need to handle more than one
1080 struct radeon_cmdbuf
**new_cs_array
;
1083 new_cs_array
= malloc(cs
->num_old_cs_buffers
*
1084 sizeof(*new_cs_array
));
1085 assert(new_cs_array
);
1087 for (unsigned j
= 0; j
< cs
->num_old_cs_buffers
; j
++)
1088 new_cs_array
[idx
++] = &cs
->old_cs_buffers
[j
];
1089 new_cs_array
[idx
++] = cs_array
[i
];
1091 for (unsigned j
= 0; j
< number_of_ibs
; j
++) {
1092 struct radeon_cmdbuf
*rcs
= new_cs_array
[j
];
1093 bool needs_preamble
= preamble_cs
&& j
== 0;
1097 size
+= preamble_cs
->cdw
;
1100 assert(size
< 0xffff8);
1102 while (!size
|| (size
& 7)) {
1107 bos
[j
] = ws
->buffer_create(ws
, 4 * size
, 4096,
1109 RADEON_FLAG_CPU_ACCESS
|
1110 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
1111 RADEON_FLAG_READ_ONLY
,
1112 RADV_BO_PRIORITY_CS
);
1113 ptr
= ws
->buffer_map(bos
[j
]);
1115 if (needs_preamble
) {
1116 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
1117 ptr
+= preamble_cs
->cdw
;
1120 memcpy(ptr
, rcs
->buf
, 4 * rcs
->cdw
);
1123 for (unsigned k
= 0; k
< pad_words
; ++k
)
1127 ibs
[j
].ib_mc_address
= radv_buffer_get_va(bos
[j
]);
1135 size
+= preamble_cs
->cdw
;
1137 while (i
+ cnt
< cs_count
&& 0xffff8 - size
>= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
) {
1138 size
+= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
;
1142 while (!size
|| (size
& 7)) {
1148 bos
[0] = ws
->buffer_create(ws
, 4 * size
, 4096,
1150 RADEON_FLAG_CPU_ACCESS
|
1151 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
1152 RADEON_FLAG_READ_ONLY
,
1153 RADV_BO_PRIORITY_CS
);
1154 ptr
= ws
->buffer_map(bos
[0]);
1157 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
1158 ptr
+= preamble_cs
->cdw
;
1161 for (unsigned j
= 0; j
< cnt
; ++j
) {
1162 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
1163 memcpy(ptr
, cs
->base
.buf
, 4 * cs
->base
.cdw
);
1164 ptr
+= cs
->base
.cdw
;
1168 for (unsigned j
= 0; j
< pad_words
; ++j
)
1172 ibs
[0].ib_mc_address
= radv_buffer_get_va(bos
[0]);
1176 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
,
1177 (struct radv_amdgpu_winsys_bo
**)bos
,
1178 number_of_ibs
, preamble_cs
,
1179 radv_bo_list
, &bo_list
);
1181 fprintf(stderr
, "amdgpu: buffer list creation failed "
1182 "for the sysmem submission (%d)\n", r
);
1188 memset(&request
, 0, sizeof(request
));
1190 request
.ip_type
= cs0
->hw_ip
;
1191 request
.ring
= queue_idx
;
1192 request
.resources
= bo_list
;
1193 request
.number_of_ibs
= number_of_ibs
;
1195 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
1197 sem_info
->cs_emit_signal
= (i
== cs_count
- cnt
) ? emit_signal_sem
: false;
1198 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
1201 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
1203 fprintf(stderr
, "amdgpu: The CS has been rejected, "
1204 "see dmesg for more information.\n");
1207 amdgpu_bo_list_destroy_raw(ctx
->ws
->dev
, bo_list
);
1209 for (unsigned j
= 0; j
< number_of_ibs
; j
++) {
1210 ws
->buffer_destroy(bos
[j
]);
1222 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
1224 radv_assign_last_submit(ctx
, &request
);
1229 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx
*_ctx
,
1231 struct radeon_cmdbuf
**cs_array
,
1233 struct radeon_cmdbuf
*initial_preamble_cs
,
1234 struct radeon_cmdbuf
*continue_preamble_cs
,
1235 struct radv_winsys_sem_info
*sem_info
,
1236 const struct radv_winsys_bo_list
*bo_list
,
1238 struct radeon_winsys_fence
*_fence
)
1240 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[0]);
1241 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
1245 if (!cs
->ws
->use_ib_bos
) {
1246 ret
= radv_amdgpu_winsys_cs_submit_sysmem(_ctx
, queue_idx
, sem_info
, bo_list
, cs_array
,
1247 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
1248 } else if (can_patch
) {
1249 ret
= radv_amdgpu_winsys_cs_submit_chained(_ctx
, queue_idx
, sem_info
, bo_list
, cs_array
,
1250 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
1252 ret
= radv_amdgpu_winsys_cs_submit_fallback(_ctx
, queue_idx
, sem_info
, bo_list
, cs_array
,
1253 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
1256 radv_amdgpu_signal_sems(ctx
, cs
->hw_ip
, queue_idx
, sem_info
);
1260 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs
, uint64_t addr
)
1262 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
1267 for (unsigned i
= 0; i
<= cs
->num_old_ib_buffers
; ++i
) {
1268 struct radv_amdgpu_winsys_bo
*bo
;
1270 bo
= (struct radv_amdgpu_winsys_bo
*)
1271 (i
== cs
->num_old_ib_buffers
? cs
->ib_buffer
: cs
->old_ib_buffers
[i
]);
1272 if (addr
>= bo
->base
.va
&& addr
- bo
->base
.va
< bo
->size
) {
1273 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0)
1274 return (char *)ret
+ (addr
- bo
->base
.va
);
1277 if(cs
->ws
->debug_all_bos
) {
1278 pthread_mutex_lock(&cs
->ws
->global_bo_list_lock
);
1279 list_for_each_entry(struct radv_amdgpu_winsys_bo
, bo
,
1280 &cs
->ws
->global_bo_list
, global_list_item
) {
1281 if (addr
>= bo
->base
.va
&& addr
- bo
->base
.va
< bo
->size
) {
1282 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0) {
1283 pthread_mutex_unlock(&cs
->ws
->global_bo_list_lock
);
1284 return (char *)ret
+ (addr
- bo
->base
.va
);
1288 pthread_mutex_unlock(&cs
->ws
->global_bo_list_lock
);
1293 static void radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf
*_cs
,
1295 const int *trace_ids
, int trace_id_count
)
1297 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
1298 void *ib
= cs
->base
.buf
;
1299 int num_dw
= cs
->base
.cdw
;
1301 if (cs
->ws
->use_ib_bos
) {
1302 ib
= radv_amdgpu_winsys_get_cpu_addr(cs
, cs
->ib
.ib_mc_address
);
1303 num_dw
= cs
->ib
.size
;
1306 ac_parse_ib(file
, ib
, num_dw
, trace_ids
, trace_id_count
, "main IB",
1307 cs
->ws
->info
.chip_class
, radv_amdgpu_winsys_get_cpu_addr
, cs
);
1310 static uint32_t radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority
)
1312 switch (radv_priority
) {
1313 case RADEON_CTX_PRIORITY_REALTIME
:
1314 return AMDGPU_CTX_PRIORITY_VERY_HIGH
;
1315 case RADEON_CTX_PRIORITY_HIGH
:
1316 return AMDGPU_CTX_PRIORITY_HIGH
;
1317 case RADEON_CTX_PRIORITY_MEDIUM
:
1318 return AMDGPU_CTX_PRIORITY_NORMAL
;
1319 case RADEON_CTX_PRIORITY_LOW
:
1320 return AMDGPU_CTX_PRIORITY_LOW
;
1322 unreachable("Invalid context priority");
1326 static VkResult
radv_amdgpu_ctx_create(struct radeon_winsys
*_ws
,
1327 enum radeon_ctx_priority priority
,
1328 struct radeon_winsys_ctx
**rctx
)
1330 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1331 struct radv_amdgpu_ctx
*ctx
= CALLOC_STRUCT(radv_amdgpu_ctx
);
1332 uint32_t amdgpu_priority
= radv_to_amdgpu_priority(priority
);
1337 return VK_ERROR_OUT_OF_HOST_MEMORY
;
1339 r
= amdgpu_cs_ctx_create2(ws
->dev
, amdgpu_priority
, &ctx
->ctx
);
1340 if (r
&& r
== -EACCES
) {
1341 result
= VK_ERROR_NOT_PERMITTED_EXT
;
1344 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r
);
1345 result
= VK_ERROR_OUT_OF_HOST_MEMORY
;
1350 assert(AMDGPU_HW_IP_NUM
* MAX_RINGS_PER_TYPE
* sizeof(uint64_t) <= 4096);
1351 ctx
->fence_bo
= ws
->base
.buffer_create(&ws
->base
, 4096, 8,
1353 RADEON_FLAG_CPU_ACCESS
|
1354 RADEON_FLAG_NO_INTERPROCESS_SHARING
,
1355 RADV_BO_PRIORITY_CS
);
1357 ctx
->fence_map
= (uint64_t*)ws
->base
.buffer_map(ctx
->fence_bo
);
1359 memset(ctx
->fence_map
, 0, 4096);
1361 *rctx
= (struct radeon_winsys_ctx
*)ctx
;
1368 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx
*rwctx
)
1370 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1371 ctx
->ws
->base
.buffer_destroy(ctx
->fence_bo
);
1372 amdgpu_cs_ctx_free(ctx
->ctx
);
1376 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx
*rwctx
,
1377 enum ring_type ring_type
, int ring_index
)
1379 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1380 int ip_type
= ring_to_hw_ip(ring_type
);
1382 if (ctx
->last_submission
[ip_type
][ring_index
].fence
.fence
) {
1384 int ret
= amdgpu_cs_query_fence_status(&ctx
->last_submission
[ip_type
][ring_index
].fence
,
1385 1000000000ull, 0, &expired
);
1387 if (ret
|| !expired
)
1394 static struct radeon_winsys_sem
*radv_amdgpu_create_sem(struct radeon_winsys
*_ws
)
1396 struct amdgpu_cs_fence
*sem
= CALLOC_STRUCT(amdgpu_cs_fence
);
1400 return (struct radeon_winsys_sem
*)sem
;
1403 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem
*_sem
)
1405 struct amdgpu_cs_fence
*sem
= (struct amdgpu_cs_fence
*)_sem
;
1409 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx
*ctx
,
1412 struct radv_winsys_sem_info
*sem_info
)
1414 for (unsigned i
= 0; i
< sem_info
->signal
.sem_count
; i
++) {
1415 struct amdgpu_cs_fence
*sem
= (struct amdgpu_cs_fence
*)(sem_info
->signal
.sem
)[i
];
1420 *sem
= ctx
->last_submission
[ip_type
][ring
].fence
;
1425 static struct drm_amdgpu_cs_chunk_sem
*radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts
*counts
,
1426 struct drm_amdgpu_cs_chunk
*chunk
, int chunk_id
)
1428 struct drm_amdgpu_cs_chunk_sem
*syncobj
= malloc(sizeof(struct drm_amdgpu_cs_chunk_sem
) * counts
->syncobj_count
);
1432 for (unsigned i
= 0; i
< counts
->syncobj_count
; i
++) {
1433 struct drm_amdgpu_cs_chunk_sem
*sem
= &syncobj
[i
];
1434 sem
->handle
= counts
->syncobj
[i
];
1437 chunk
->chunk_id
= chunk_id
;
1438 chunk
->length_dw
= sizeof(struct drm_amdgpu_cs_chunk_sem
) / 4 * counts
->syncobj_count
;
1439 chunk
->chunk_data
= (uint64_t)(uintptr_t)syncobj
;
1443 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx
*ctx
,
1444 struct radv_amdgpu_cs_request
*request
,
1445 struct radv_winsys_sem_info
*sem_info
)
1451 struct drm_amdgpu_cs_chunk
*chunks
;
1452 struct drm_amdgpu_cs_chunk_data
*chunk_data
;
1453 struct drm_amdgpu_cs_chunk_dep
*sem_dependencies
= NULL
;
1454 struct drm_amdgpu_cs_chunk_sem
*wait_syncobj
= NULL
, *signal_syncobj
= NULL
;
1456 struct amdgpu_cs_fence
*sem
;
1458 user_fence
= (request
->fence_info
.handle
!= NULL
);
1459 size
= request
->number_of_ibs
+ (user_fence
? 2 : 1) + 3;
1461 chunks
= alloca(sizeof(struct drm_amdgpu_cs_chunk
) * size
);
1463 size
= request
->number_of_ibs
+ (user_fence
? 1 : 0);
1465 chunk_data
= alloca(sizeof(struct drm_amdgpu_cs_chunk_data
) * size
);
1467 num_chunks
= request
->number_of_ibs
;
1468 for (i
= 0; i
< request
->number_of_ibs
; i
++) {
1469 struct amdgpu_cs_ib_info
*ib
;
1470 chunks
[i
].chunk_id
= AMDGPU_CHUNK_ID_IB
;
1471 chunks
[i
].length_dw
= sizeof(struct drm_amdgpu_cs_chunk_ib
) / 4;
1472 chunks
[i
].chunk_data
= (uint64_t)(uintptr_t)&chunk_data
[i
];
1474 ib
= &request
->ibs
[i
];
1476 chunk_data
[i
].ib_data
._pad
= 0;
1477 chunk_data
[i
].ib_data
.va_start
= ib
->ib_mc_address
;
1478 chunk_data
[i
].ib_data
.ib_bytes
= ib
->size
* 4;
1479 chunk_data
[i
].ib_data
.ip_type
= request
->ip_type
;
1480 chunk_data
[i
].ib_data
.ip_instance
= request
->ip_instance
;
1481 chunk_data
[i
].ib_data
.ring
= request
->ring
;
1482 chunk_data
[i
].ib_data
.flags
= ib
->flags
;
1488 chunks
[i
].chunk_id
= AMDGPU_CHUNK_ID_FENCE
;
1489 chunks
[i
].length_dw
= sizeof(struct drm_amdgpu_cs_chunk_fence
) / 4;
1490 chunks
[i
].chunk_data
= (uint64_t)(uintptr_t)&chunk_data
[i
];
1492 amdgpu_cs_chunk_fence_info_to_data(&request
->fence_info
,
1496 if (sem_info
->wait
.syncobj_count
&& sem_info
->cs_emit_wait
) {
1497 wait_syncobj
= radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info
->wait
,
1498 &chunks
[num_chunks
],
1499 AMDGPU_CHUNK_ID_SYNCOBJ_IN
);
1500 if (!wait_syncobj
) {
1506 if (sem_info
->wait
.sem_count
== 0)
1507 sem_info
->cs_emit_wait
= false;
1511 if (sem_info
->wait
.sem_count
&& sem_info
->cs_emit_wait
) {
1512 sem_dependencies
= alloca(sizeof(struct drm_amdgpu_cs_chunk_dep
) * sem_info
->wait
.sem_count
);
1515 for (unsigned j
= 0; j
< sem_info
->wait
.sem_count
; j
++) {
1516 sem
= (struct amdgpu_cs_fence
*)sem_info
->wait
.sem
[j
];
1519 struct drm_amdgpu_cs_chunk_dep
*dep
= &sem_dependencies
[sem_count
++];
1521 amdgpu_cs_chunk_fence_to_dep(sem
, dep
);
1523 sem
->context
= NULL
;
1527 /* dependencies chunk */
1528 chunks
[i
].chunk_id
= AMDGPU_CHUNK_ID_DEPENDENCIES
;
1529 chunks
[i
].length_dw
= sizeof(struct drm_amdgpu_cs_chunk_dep
) / 4 * sem_count
;
1530 chunks
[i
].chunk_data
= (uint64_t)(uintptr_t)sem_dependencies
;
1532 sem_info
->cs_emit_wait
= false;
1535 if (sem_info
->signal
.syncobj_count
&& sem_info
->cs_emit_signal
) {
1536 signal_syncobj
= radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info
->signal
,
1537 &chunks
[num_chunks
],
1538 AMDGPU_CHUNK_ID_SYNCOBJ_OUT
);
1539 if (!signal_syncobj
) {
1546 r
= amdgpu_cs_submit_raw2(ctx
->ws
->dev
,
1554 free(signal_syncobj
);
1558 static int radv_amdgpu_create_syncobj(struct radeon_winsys
*_ws
,
1561 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1562 return amdgpu_cs_create_syncobj(ws
->dev
, handle
);
1565 static void radv_amdgpu_destroy_syncobj(struct radeon_winsys
*_ws
,
1568 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1569 amdgpu_cs_destroy_syncobj(ws
->dev
, handle
);
1572 static void radv_amdgpu_reset_syncobj(struct radeon_winsys
*_ws
,
1575 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1576 amdgpu_cs_syncobj_reset(ws
->dev
, &handle
, 1);
1579 static void radv_amdgpu_signal_syncobj(struct radeon_winsys
*_ws
,
1582 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1583 amdgpu_cs_syncobj_signal(ws
->dev
, &handle
, 1);
1586 static bool radv_amdgpu_wait_syncobj(struct radeon_winsys
*_ws
, const uint32_t *handles
,
1587 uint32_t handle_count
, bool wait_all
, uint64_t timeout
)
1589 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1592 /* The timeouts are signed, while vulkan timeouts are unsigned. */
1593 timeout
= MIN2(timeout
, INT64_MAX
);
1595 int ret
= amdgpu_cs_syncobj_wait(ws
->dev
, (uint32_t*)handles
, handle_count
, timeout
,
1596 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT
|
1597 (wait_all
? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
: 0),
1601 } else if (ret
== -ETIME
) {
1604 fprintf(stderr
, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno
);
1609 static int radv_amdgpu_export_syncobj(struct radeon_winsys
*_ws
,
1613 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1615 return amdgpu_cs_export_syncobj(ws
->dev
, syncobj
, fd
);
1618 static int radv_amdgpu_import_syncobj(struct radeon_winsys
*_ws
,
1622 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1624 return amdgpu_cs_import_syncobj(ws
->dev
, fd
, syncobj
);
1628 static int radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys
*_ws
,
1632 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1634 return amdgpu_cs_syncobj_export_sync_file(ws
->dev
, syncobj
, fd
);
1637 static int radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys
*_ws
,
1641 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1643 return amdgpu_cs_syncobj_import_sync_file(ws
->dev
, syncobj
, fd
);
1646 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys
*ws
)
1648 ws
->base
.ctx_create
= radv_amdgpu_ctx_create
;
1649 ws
->base
.ctx_destroy
= radv_amdgpu_ctx_destroy
;
1650 ws
->base
.ctx_wait_idle
= radv_amdgpu_ctx_wait_idle
;
1651 ws
->base
.cs_create
= radv_amdgpu_cs_create
;
1652 ws
->base
.cs_destroy
= radv_amdgpu_cs_destroy
;
1653 ws
->base
.cs_grow
= radv_amdgpu_cs_grow
;
1654 ws
->base
.cs_finalize
= radv_amdgpu_cs_finalize
;
1655 ws
->base
.cs_reset
= radv_amdgpu_cs_reset
;
1656 ws
->base
.cs_add_buffer
= radv_amdgpu_cs_add_buffer
;
1657 ws
->base
.cs_execute_secondary
= radv_amdgpu_cs_execute_secondary
;
1658 ws
->base
.cs_submit
= radv_amdgpu_winsys_cs_submit
;
1659 ws
->base
.cs_dump
= radv_amdgpu_winsys_cs_dump
;
1660 ws
->base
.create_fence
= radv_amdgpu_create_fence
;
1661 ws
->base
.destroy_fence
= radv_amdgpu_destroy_fence
;
1662 ws
->base
.reset_fence
= radv_amdgpu_reset_fence
;
1663 ws
->base
.signal_fence
= radv_amdgpu_signal_fence
;
1664 ws
->base
.is_fence_waitable
= radv_amdgpu_is_fence_waitable
;
1665 ws
->base
.create_sem
= radv_amdgpu_create_sem
;
1666 ws
->base
.destroy_sem
= radv_amdgpu_destroy_sem
;
1667 ws
->base
.create_syncobj
= radv_amdgpu_create_syncobj
;
1668 ws
->base
.destroy_syncobj
= radv_amdgpu_destroy_syncobj
;
1669 ws
->base
.reset_syncobj
= radv_amdgpu_reset_syncobj
;
1670 ws
->base
.signal_syncobj
= radv_amdgpu_signal_syncobj
;
1671 ws
->base
.wait_syncobj
= radv_amdgpu_wait_syncobj
;
1672 ws
->base
.export_syncobj
= radv_amdgpu_export_syncobj
;
1673 ws
->base
.import_syncobj
= radv_amdgpu_import_syncobj
;
1674 ws
->base
.export_syncobj_to_sync_file
= radv_amdgpu_export_syncobj_to_sync_file
;
1675 ws
->base
.import_syncobj_from_sync_file
= radv_amdgpu_import_syncobj_from_sync_file
;
1676 ws
->base
.fence_wait
= radv_amdgpu_fence_wait
;
1677 ws
->base
.fences_wait
= radv_amdgpu_fences_wait
;