2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include <amdgpu_drm.h>
33 #include "radv_radeon_winsys.h"
34 #include "radv_amdgpu_cs.h"
35 #include "radv_amdgpu_bo.h"
40 VIRTUAL_BUFFER_HASH_TABLE_SIZE
= 1024
43 struct radv_amdgpu_cs
{
44 struct radeon_cmdbuf base
;
45 struct radv_amdgpu_winsys
*ws
;
47 struct amdgpu_cs_ib_info ib
;
49 struct radeon_winsys_bo
*ib_buffer
;
51 unsigned max_num_buffers
;
53 amdgpu_bo_handle
*handles
;
55 struct radeon_winsys_bo
**old_ib_buffers
;
56 unsigned num_old_ib_buffers
;
57 unsigned max_num_old_ib_buffers
;
58 unsigned *ib_size_ptr
;
62 int buffer_hash_table
[1024];
65 unsigned num_virtual_buffers
;
66 unsigned max_num_virtual_buffers
;
67 struct radeon_winsys_bo
**virtual_buffers
;
68 int *virtual_buffer_hash_table
;
70 /* For chips that don't support chaining. */
71 struct radeon_cmdbuf
*old_cs_buffers
;
72 unsigned num_old_cs_buffers
;
75 static inline struct radv_amdgpu_cs
*
76 radv_amdgpu_cs(struct radeon_cmdbuf
*base
)
78 return (struct radv_amdgpu_cs
*)base
;
81 static int ring_to_hw_ip(enum ring_type ring
)
85 return AMDGPU_HW_IP_GFX
;
87 return AMDGPU_HW_IP_DMA
;
89 return AMDGPU_HW_IP_COMPUTE
;
91 unreachable("unsupported ring");
95 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx
*ctx
,
98 struct radv_winsys_sem_info
*sem_info
);
99 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx
*ctx
,
100 struct amdgpu_cs_request
*request
,
101 struct radv_winsys_sem_info
*sem_info
);
103 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx
*ctx
,
104 struct radv_amdgpu_fence
*fence
,
105 struct amdgpu_cs_request
*req
)
107 fence
->fence
.context
= ctx
->ctx
;
108 fence
->fence
.ip_type
= req
->ip_type
;
109 fence
->fence
.ip_instance
= req
->ip_instance
;
110 fence
->fence
.ring
= req
->ring
;
111 fence
->fence
.fence
= req
->seq_no
;
112 fence
->user_ptr
= (volatile uint64_t*)(ctx
->fence_map
+ (req
->ip_type
* MAX_RINGS_PER_TYPE
+ req
->ring
) * sizeof(uint64_t));
115 static struct radeon_winsys_fence
*radv_amdgpu_create_fence()
117 struct radv_amdgpu_fence
*fence
= calloc(1, sizeof(struct radv_amdgpu_fence
));
118 return (struct radeon_winsys_fence
*)fence
;
121 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence
*_fence
)
123 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
127 static bool radv_amdgpu_fence_wait(struct radeon_winsys
*_ws
,
128 struct radeon_winsys_fence
*_fence
,
132 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
133 unsigned flags
= absolute
? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE
: 0;
135 uint32_t expired
= 0;
137 if (fence
->user_ptr
) {
138 if (*fence
->user_ptr
>= fence
->fence
.fence
)
140 if (!absolute
&& !timeout
)
144 /* Now use the libdrm query. */
145 r
= amdgpu_cs_query_fence_status(&fence
->fence
,
151 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
162 static bool radv_amdgpu_fences_wait(struct radeon_winsys
*_ws
,
163 struct radeon_winsys_fence
*const *_fences
,
164 uint32_t fence_count
,
168 struct amdgpu_cs_fence
*fences
= malloc(sizeof(struct amdgpu_cs_fence
) * fence_count
);
170 uint32_t expired
= 0, first
= 0;
175 for (uint32_t i
= 0; i
< fence_count
; ++i
)
176 fences
[i
] = ((struct radv_amdgpu_fence
*)_fences
[i
])->fence
;
178 /* Now use the libdrm query. */
179 r
= amdgpu_cs_wait_fences(fences
, fence_count
, wait_all
,
180 timeout
, &expired
, &first
);
184 fprintf(stderr
, "amdgpu: amdgpu_cs_wait_fences failed.\n");
194 static void radv_amdgpu_cs_destroy(struct radeon_cmdbuf
*rcs
)
196 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(rcs
);
199 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
203 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
204 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
206 for (unsigned i
= 0; i
< cs
->num_old_cs_buffers
; ++i
) {
207 struct radeon_cmdbuf
*rcs
= &cs
->old_cs_buffers
[i
];
211 free(cs
->old_cs_buffers
);
212 free(cs
->old_ib_buffers
);
213 free(cs
->virtual_buffers
);
214 free(cs
->virtual_buffer_hash_table
);
219 static void radv_amdgpu_init_cs(struct radv_amdgpu_cs
*cs
,
220 enum ring_type ring_type
)
222 for (int i
= 0; i
< ARRAY_SIZE(cs
->buffer_hash_table
); ++i
)
223 cs
->buffer_hash_table
[i
] = -1;
225 cs
->hw_ip
= ring_to_hw_ip(ring_type
);
228 static struct radeon_cmdbuf
*
229 radv_amdgpu_cs_create(struct radeon_winsys
*ws
,
230 enum ring_type ring_type
)
232 struct radv_amdgpu_cs
*cs
;
233 uint32_t ib_size
= 20 * 1024 * 4;
234 cs
= calloc(1, sizeof(struct radv_amdgpu_cs
));
238 cs
->ws
= radv_amdgpu_winsys(ws
);
239 radv_amdgpu_init_cs(cs
, ring_type
);
241 if (cs
->ws
->use_ib_bos
) {
242 cs
->ib_buffer
= ws
->buffer_create(ws
, ib_size
, 0,
244 RADEON_FLAG_CPU_ACCESS
|
245 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
246 RADEON_FLAG_READ_ONLY
);
247 if (!cs
->ib_buffer
) {
252 cs
->ib_mapped
= ws
->buffer_map(cs
->ib_buffer
);
253 if (!cs
->ib_mapped
) {
254 ws
->buffer_destroy(cs
->ib_buffer
);
259 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
;
260 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
261 cs
->base
.max_dw
= ib_size
/ 4 - 4;
262 cs
->ib_size_ptr
= &cs
->ib
.size
;
265 ws
->cs_add_buffer(&cs
->base
, cs
->ib_buffer
);
267 cs
->base
.buf
= malloc(16384);
268 cs
->base
.max_dw
= 4096;
278 static void radv_amdgpu_cs_grow(struct radeon_cmdbuf
*_cs
, size_t min_size
)
280 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
287 if (!cs
->ws
->use_ib_bos
) {
288 const uint64_t limit_dws
= 0xffff8;
289 uint64_t ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
290 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
292 /* The total ib size cannot exceed limit_dws dwords. */
293 if (ib_dws
> limit_dws
)
295 /* The maximum size in dwords has been reached,
296 * try to allocate a new one.
298 if (cs
->num_old_cs_buffers
+ 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT
) {
299 /* TODO: Allow to submit more than 4 IBs. */
300 fprintf(stderr
, "amdgpu: Maximum number of IBs "
301 "per submit reached.\n");
308 realloc(cs
->old_cs_buffers
,
309 (cs
->num_old_cs_buffers
+ 1) * sizeof(*cs
->old_cs_buffers
));
310 if (!cs
->old_cs_buffers
) {
316 /* Store the current one for submitting it later. */
317 cs
->old_cs_buffers
[cs
->num_old_cs_buffers
].cdw
= cs
->base
.cdw
;
318 cs
->old_cs_buffers
[cs
->num_old_cs_buffers
].max_dw
= cs
->base
.max_dw
;
319 cs
->old_cs_buffers
[cs
->num_old_cs_buffers
].buf
= cs
->base
.buf
;
320 cs
->num_old_cs_buffers
++;
322 /* Reset the cs, it will be re-allocated below. */
326 /* Re-compute the number of dwords to allocate. */
327 ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
328 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
329 if (ib_dws
> limit_dws
) {
330 fprintf(stderr
, "amdgpu: Too high number of "
331 "dwords to allocate\n");
337 uint32_t *new_buf
= realloc(cs
->base
.buf
, ib_dws
* 4);
339 cs
->base
.buf
= new_buf
;
340 cs
->base
.max_dw
= ib_dws
;
348 uint64_t ib_size
= MAX2(min_size
* 4 + 16, cs
->base
.max_dw
* 4 * 2);
350 /* max that fits in the chain size field. */
351 ib_size
= MIN2(ib_size
, 0xfffff);
353 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 4)
354 radeon_emit(&cs
->base
, 0xffff1000);
356 *cs
->ib_size_ptr
|= cs
->base
.cdw
+ 4;
358 if (cs
->num_old_ib_buffers
== cs
->max_num_old_ib_buffers
) {
359 cs
->max_num_old_ib_buffers
= MAX2(1, cs
->max_num_old_ib_buffers
* 2);
360 cs
->old_ib_buffers
= realloc(cs
->old_ib_buffers
,
361 cs
->max_num_old_ib_buffers
* sizeof(void*));
364 cs
->old_ib_buffers
[cs
->num_old_ib_buffers
++] = cs
->ib_buffer
;
366 cs
->ib_buffer
= cs
->ws
->base
.buffer_create(&cs
->ws
->base
, ib_size
, 0,
368 RADEON_FLAG_CPU_ACCESS
|
369 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
370 RADEON_FLAG_READ_ONLY
);
372 if (!cs
->ib_buffer
) {
375 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
378 cs
->ib_mapped
= cs
->ws
->base
.buffer_map(cs
->ib_buffer
);
379 if (!cs
->ib_mapped
) {
380 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
383 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
386 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
);
388 radeon_emit(&cs
->base
, PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0));
389 radeon_emit(&cs
->base
, radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
);
390 radeon_emit(&cs
->base
, radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
>> 32);
391 radeon_emit(&cs
->base
, S_3F2_CHAIN(1) | S_3F2_VALID(1));
393 cs
->ib_size_ptr
= cs
->base
.buf
+ cs
->base
.cdw
- 1;
395 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
397 cs
->base
.max_dw
= ib_size
/ 4 - 4;
401 static bool radv_amdgpu_cs_finalize(struct radeon_cmdbuf
*_cs
)
403 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
405 if (cs
->ws
->use_ib_bos
) {
406 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 0)
407 radeon_emit(&cs
->base
, 0xffff1000);
409 *cs
->ib_size_ptr
|= cs
->base
.cdw
;
411 cs
->is_chained
= false;
417 static void radv_amdgpu_cs_reset(struct radeon_cmdbuf
*_cs
)
419 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
423 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
424 unsigned hash
= ((uintptr_t)cs
->handles
[i
] >> 6) &
425 (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
426 cs
->buffer_hash_table
[hash
] = -1;
429 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
430 unsigned hash
= ((uintptr_t)cs
->virtual_buffers
[i
] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
431 cs
->virtual_buffer_hash_table
[hash
] = -1;
435 cs
->num_virtual_buffers
= 0;
437 if (cs
->ws
->use_ib_bos
) {
438 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
);
440 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
441 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
443 cs
->num_old_ib_buffers
= 0;
444 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->base
.va
;
445 cs
->ib_size_ptr
= &cs
->ib
.size
;
448 for (unsigned i
= 0; i
< cs
->num_old_cs_buffers
; ++i
) {
449 struct radeon_cmdbuf
*rcs
= &cs
->old_cs_buffers
[i
];
453 free(cs
->old_cs_buffers
);
454 cs
->old_cs_buffers
= NULL
;
455 cs
->num_old_cs_buffers
= 0;
459 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs
*cs
,
462 unsigned hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
463 int index
= cs
->buffer_hash_table
[hash
];
468 if (cs
->handles
[index
] == bo
)
471 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
472 if (cs
->handles
[i
] == bo
) {
473 cs
->buffer_hash_table
[hash
] = i
;
481 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs
*cs
,
485 int index
= radv_amdgpu_cs_find_buffer(cs
, bo
);
490 if (cs
->num_buffers
== cs
->max_num_buffers
) {
491 unsigned new_count
= MAX2(1, cs
->max_num_buffers
* 2);
492 cs
->handles
= realloc(cs
->handles
, new_count
* sizeof(amdgpu_bo_handle
));
493 cs
->max_num_buffers
= new_count
;
496 cs
->handles
[cs
->num_buffers
] = bo
;
498 hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
499 cs
->buffer_hash_table
[hash
] = cs
->num_buffers
;
504 static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_cmdbuf
*_cs
,
505 struct radeon_winsys_bo
*bo
)
507 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
508 unsigned hash
= ((uintptr_t)bo
>> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
511 if (!cs
->virtual_buffer_hash_table
) {
512 cs
->virtual_buffer_hash_table
= malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE
* sizeof(int));
513 for (int i
= 0; i
< VIRTUAL_BUFFER_HASH_TABLE_SIZE
; ++i
)
514 cs
->virtual_buffer_hash_table
[i
] = -1;
517 if (cs
->virtual_buffer_hash_table
[hash
] >= 0) {
518 int idx
= cs
->virtual_buffer_hash_table
[hash
];
519 if (cs
->virtual_buffers
[idx
] == bo
) {
522 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
523 if (cs
->virtual_buffers
[i
] == bo
) {
524 cs
->virtual_buffer_hash_table
[hash
] = i
;
530 if(cs
->max_num_virtual_buffers
<= cs
->num_virtual_buffers
) {
531 cs
->max_num_virtual_buffers
= MAX2(2, cs
->max_num_virtual_buffers
* 2);
532 cs
->virtual_buffers
= realloc(cs
->virtual_buffers
, sizeof(struct radv_amdgpu_virtual_virtual_buffer
*) * cs
->max_num_virtual_buffers
);
535 cs
->virtual_buffers
[cs
->num_virtual_buffers
] = bo
;
537 cs
->virtual_buffer_hash_table
[hash
] = cs
->num_virtual_buffers
;
538 ++cs
->num_virtual_buffers
;
542 static void radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf
*_cs
,
543 struct radeon_winsys_bo
*_bo
)
545 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
546 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
548 if (bo
->is_virtual
) {
549 radv_amdgpu_cs_add_virtual_buffer(_cs
, _bo
);
553 if (bo
->base
.is_local
)
556 radv_amdgpu_cs_add_buffer_internal(cs
, bo
->bo
);
559 static void radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf
*_parent
,
560 struct radeon_cmdbuf
*_child
)
562 struct radv_amdgpu_cs
*parent
= radv_amdgpu_cs(_parent
);
563 struct radv_amdgpu_cs
*child
= radv_amdgpu_cs(_child
);
565 for (unsigned i
= 0; i
< child
->num_buffers
; ++i
) {
566 radv_amdgpu_cs_add_buffer_internal(parent
, child
->handles
[i
]);
569 for (unsigned i
= 0; i
< child
->num_virtual_buffers
; ++i
) {
570 radv_amdgpu_cs_add_buffer(&parent
->base
, child
->virtual_buffers
[i
]);
573 if (parent
->ws
->use_ib_bos
) {
574 if (parent
->base
.cdw
+ 4 > parent
->base
.max_dw
)
575 radv_amdgpu_cs_grow(&parent
->base
, 4);
577 radeon_emit(&parent
->base
, PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0));
578 radeon_emit(&parent
->base
, child
->ib
.ib_mc_address
);
579 radeon_emit(&parent
->base
, child
->ib
.ib_mc_address
>> 32);
580 radeon_emit(&parent
->base
, child
->ib
.size
);
582 if (parent
->base
.cdw
+ child
->base
.cdw
> parent
->base
.max_dw
)
583 radv_amdgpu_cs_grow(&parent
->base
, child
->base
.cdw
);
585 memcpy(parent
->base
.buf
+ parent
->base
.cdw
, child
->base
.buf
, 4 * child
->base
.cdw
);
586 parent
->base
.cdw
+= child
->base
.cdw
;
590 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys
*ws
,
591 struct radeon_cmdbuf
**cs_array
,
593 struct radv_amdgpu_winsys_bo
**extra_bo_array
,
594 unsigned num_extra_bo
,
595 struct radeon_cmdbuf
*extra_cs
,
596 const struct radv_winsys_bo_list
*radv_bo_list
,
597 amdgpu_bo_list_handle
*bo_list
)
601 if (ws
->debug_all_bos
) {
602 struct radv_amdgpu_winsys_bo
*bo
;
603 amdgpu_bo_handle
*handles
;
606 pthread_mutex_lock(&ws
->global_bo_list_lock
);
608 handles
= malloc(sizeof(handles
[0]) * ws
->num_buffers
);
610 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
614 LIST_FOR_EACH_ENTRY(bo
, &ws
->global_bo_list
, global_list_item
) {
615 assert(num
< ws
->num_buffers
);
616 handles
[num
++] = bo
->bo
;
619 r
= amdgpu_bo_list_create(ws
->dev
, ws
->num_buffers
,
623 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
624 } else if (count
== 1 && !num_extra_bo
&& !extra_cs
&& !radv_bo_list
&&
625 !radv_amdgpu_cs(cs_array
[0])->num_virtual_buffers
) {
626 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[0];
627 if (cs
->num_buffers
== 0) {
631 r
= amdgpu_bo_list_create(ws
->dev
, cs
->num_buffers
, cs
->handles
,
634 unsigned total_buffer_count
= num_extra_bo
;
635 unsigned unique_bo_count
= num_extra_bo
;
636 for (unsigned i
= 0; i
< count
; ++i
) {
637 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
638 total_buffer_count
+= cs
->num_buffers
;
639 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
)
640 total_buffer_count
+= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
])->bo_count
;
644 total_buffer_count
+= ((struct radv_amdgpu_cs
*)extra_cs
)->num_buffers
;
648 total_buffer_count
+= radv_bo_list
->count
;
651 if (total_buffer_count
== 0) {
655 amdgpu_bo_handle
*handles
= malloc(sizeof(amdgpu_bo_handle
) * total_buffer_count
);
661 for (unsigned i
= 0; i
< num_extra_bo
; i
++) {
662 handles
[i
] = extra_bo_array
[i
]->bo
;
665 for (unsigned i
= 0; i
< count
+ !!extra_cs
; ++i
) {
666 struct radv_amdgpu_cs
*cs
;
669 cs
= (struct radv_amdgpu_cs
*)extra_cs
;
671 cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
673 if (!cs
->num_buffers
)
676 if (unique_bo_count
== 0 && !cs
->num_virtual_buffers
) {
677 memcpy(handles
, cs
->handles
, cs
->num_buffers
* sizeof(amdgpu_bo_handle
));
678 unique_bo_count
= cs
->num_buffers
;
681 int unique_bo_so_far
= unique_bo_count
;
682 for (unsigned j
= 0; j
< cs
->num_buffers
; ++j
) {
684 for (unsigned k
= 0; k
< unique_bo_so_far
; ++k
) {
685 if (handles
[k
] == cs
->handles
[j
]) {
691 handles
[unique_bo_count
] = cs
->handles
[j
];
695 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
) {
696 struct radv_amdgpu_winsys_bo
*virtual_bo
= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
]);
697 for(unsigned k
= 0; k
< virtual_bo
->bo_count
; ++k
) {
698 struct radv_amdgpu_winsys_bo
*bo
= virtual_bo
->bos
[k
];
700 for (unsigned m
= 0; m
< unique_bo_count
; ++m
) {
701 if (handles
[m
] == bo
->bo
) {
707 handles
[unique_bo_count
] = bo
->bo
;
715 unsigned unique_bo_so_far
= unique_bo_count
;
716 for (unsigned i
= 0; i
< radv_bo_list
->count
; ++i
) {
717 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(radv_bo_list
->bos
[i
]);
719 for (unsigned j
= 0; j
< unique_bo_so_far
; ++j
) {
720 if (bo
->bo
== handles
[j
]) {
726 handles
[unique_bo_count
] = bo
->bo
;
732 if (unique_bo_count
> 0) {
733 r
= amdgpu_bo_list_create(ws
->dev
, unique_bo_count
, handles
,
745 static struct amdgpu_cs_fence_info
radv_set_cs_fence(struct radv_amdgpu_ctx
*ctx
, int ip_type
, int ring
)
747 struct amdgpu_cs_fence_info ret
= {0};
748 if (ctx
->fence_map
) {
749 ret
.handle
= radv_amdgpu_winsys_bo(ctx
->fence_bo
)->bo
;
750 ret
.offset
= (ip_type
* MAX_RINGS_PER_TYPE
+ ring
) * sizeof(uint64_t);
755 static void radv_assign_last_submit(struct radv_amdgpu_ctx
*ctx
,
756 struct amdgpu_cs_request
*request
)
758 radv_amdgpu_request_to_fence(ctx
,
759 &ctx
->last_submission
[request
->ip_type
][request
->ring
],
763 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx
*_ctx
,
765 struct radv_winsys_sem_info
*sem_info
,
766 const struct radv_winsys_bo_list
*radv_bo_list
,
767 struct radeon_cmdbuf
**cs_array
,
769 struct radeon_cmdbuf
*initial_preamble_cs
,
770 struct radeon_cmdbuf
*continue_preamble_cs
,
771 struct radeon_winsys_fence
*_fence
)
774 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
775 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
776 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
777 amdgpu_bo_list_handle bo_list
;
778 struct amdgpu_cs_request request
= {0};
779 struct amdgpu_cs_ib_info ibs
[2];
780 unsigned number_of_ibs
= 1;
782 for (unsigned i
= cs_count
; i
--;) {
783 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
785 if (cs
->is_chained
) {
786 *cs
->ib_size_ptr
-= 4;
787 cs
->is_chained
= false;
790 if (i
+ 1 < cs_count
) {
791 struct radv_amdgpu_cs
*next
= radv_amdgpu_cs(cs_array
[i
+ 1]);
792 assert(cs
->base
.cdw
+ 4 <= cs
->base
.max_dw
);
794 cs
->is_chained
= true;
795 *cs
->ib_size_ptr
+= 4;
797 cs
->base
.buf
[cs
->base
.cdw
+ 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
798 cs
->base
.buf
[cs
->base
.cdw
+ 1] = next
->ib
.ib_mc_address
;
799 cs
->base
.buf
[cs
->base
.cdw
+ 2] = next
->ib
.ib_mc_address
>> 32;
800 cs
->base
.buf
[cs
->base
.cdw
+ 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next
->ib
.size
;
804 /* Create a buffer object list. */
805 r
= radv_amdgpu_create_bo_list(cs0
->ws
, cs_array
, cs_count
, NULL
, 0,
806 initial_preamble_cs
, radv_bo_list
,
809 fprintf(stderr
, "amdgpu: buffer list creation failed for the "
810 "chained submission(%d)\n", r
);
814 /* Configure the CS request. */
815 if (initial_preamble_cs
) {
816 ibs
[0] = radv_amdgpu_cs(initial_preamble_cs
)->ib
;
823 request
.ip_type
= cs0
->hw_ip
;
824 request
.ring
= queue_idx
;
825 request
.number_of_ibs
= number_of_ibs
;
827 request
.resources
= bo_list
;
828 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
831 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
834 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
836 fprintf(stderr
, "amdgpu: The CS has been rejected, "
837 "see dmesg for more information.\n");
841 amdgpu_bo_list_destroy(bo_list
);
847 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
849 radv_assign_last_submit(ctx
, &request
);
854 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx
*_ctx
,
856 struct radv_winsys_sem_info
*sem_info
,
857 const struct radv_winsys_bo_list
*radv_bo_list
,
858 struct radeon_cmdbuf
**cs_array
,
860 struct radeon_cmdbuf
*initial_preamble_cs
,
861 struct radeon_cmdbuf
*continue_preamble_cs
,
862 struct radeon_winsys_fence
*_fence
)
865 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
866 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
867 amdgpu_bo_list_handle bo_list
;
868 struct amdgpu_cs_request request
= {};
869 struct amdgpu_cs_ib_info
*ibs
;
870 struct radv_amdgpu_cs
*cs0
;
871 unsigned number_of_ibs
;
874 cs0
= radv_amdgpu_cs(cs_array
[0]);
876 /* Compute the number of IBs for this submit. */
877 number_of_ibs
= cs_count
+ !!initial_preamble_cs
;
879 /* Create a buffer object list. */
880 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[0], cs_count
, NULL
, 0,
881 initial_preamble_cs
, radv_bo_list
,
884 fprintf(stderr
, "amdgpu: buffer list creation failed "
885 "for the fallback submission (%d)\n", r
);
889 ibs
= malloc(number_of_ibs
* sizeof(*ibs
));
892 amdgpu_bo_list_destroy(bo_list
);
896 /* Configure the CS request. */
897 if (initial_preamble_cs
)
898 ibs
[0] = radv_amdgpu_cs(initial_preamble_cs
)->ib
;
900 for (unsigned i
= 0; i
< cs_count
; i
++) {
901 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
903 ibs
[i
+ !!initial_preamble_cs
] = cs
->ib
;
905 if (cs
->is_chained
) {
906 *cs
->ib_size_ptr
-= 4;
907 cs
->is_chained
= false;
911 request
.ip_type
= cs0
->hw_ip
;
912 request
.ring
= queue_idx
;
913 request
.resources
= bo_list
;
914 request
.number_of_ibs
= number_of_ibs
;
916 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
919 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
922 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
924 fprintf(stderr
, "amdgpu: The CS has been rejected, "
925 "see dmesg for more information.\n");
929 amdgpu_bo_list_destroy(bo_list
);
936 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
938 radv_assign_last_submit(ctx
, &request
);
943 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx
*_ctx
,
945 struct radv_winsys_sem_info
*sem_info
,
946 const struct radv_winsys_bo_list
*radv_bo_list
,
947 struct radeon_cmdbuf
**cs_array
,
949 struct radeon_cmdbuf
*initial_preamble_cs
,
950 struct radeon_cmdbuf
*continue_preamble_cs
,
951 struct radeon_winsys_fence
*_fence
)
954 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
955 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
956 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
957 struct radeon_winsys
*ws
= (struct radeon_winsys
*)cs0
->ws
;
958 amdgpu_bo_list_handle bo_list
;
959 struct amdgpu_cs_request request
;
960 uint32_t pad_word
= 0xffff1000U
;
961 bool emit_signal_sem
= sem_info
->cs_emit_signal
;
963 if (radv_amdgpu_winsys(ws
)->info
.chip_class
== SI
)
964 pad_word
= 0x80000000;
968 for (unsigned i
= 0; i
< cs_count
;) {
969 struct amdgpu_cs_ib_info ibs
[AMDGPU_CS_MAX_IBS_PER_SUBMIT
] = {0};
970 unsigned number_of_ibs
= 1;
971 struct radeon_winsys_bo
*bos
[AMDGPU_CS_MAX_IBS_PER_SUBMIT
] = {0};
972 struct radeon_cmdbuf
*preamble_cs
= i
? continue_preamble_cs
: initial_preamble_cs
;
973 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
977 unsigned pad_words
= 0;
979 if (cs
->num_old_cs_buffers
> 0) {
980 /* Special path when the maximum size in dwords has
981 * been reached because we need to handle more than one
984 unsigned new_cs_count
= cs
->num_old_cs_buffers
+ 1;
985 struct radeon_cmdbuf
*new_cs_array
[AMDGPU_CS_MAX_IBS_PER_SUBMIT
];
988 for (unsigned j
= 0; j
< cs
->num_old_cs_buffers
; j
++)
989 new_cs_array
[idx
++] = &cs
->old_cs_buffers
[j
];
990 new_cs_array
[idx
++] = cs_array
[i
];
992 for (unsigned j
= 0; j
< new_cs_count
; j
++) {
993 struct radeon_cmdbuf
*rcs
= new_cs_array
[j
];
994 bool needs_preamble
= preamble_cs
&& j
== 0;
998 size
+= preamble_cs
->cdw
;
1001 assert(size
< 0xffff8);
1003 while (!size
|| (size
& 7)) {
1008 bos
[j
] = ws
->buffer_create(ws
, 4 * size
, 4096,
1010 RADEON_FLAG_CPU_ACCESS
|
1011 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
1012 RADEON_FLAG_READ_ONLY
);
1013 ptr
= ws
->buffer_map(bos
[j
]);
1015 if (needs_preamble
) {
1016 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
1017 ptr
+= preamble_cs
->cdw
;
1020 memcpy(ptr
, rcs
->buf
, 4 * rcs
->cdw
);
1023 for (unsigned k
= 0; k
< pad_words
; ++k
)
1027 ibs
[j
].ib_mc_address
= radv_buffer_get_va(bos
[j
]);
1030 number_of_ibs
= new_cs_count
;
1034 size
+= preamble_cs
->cdw
;
1036 while (i
+ cnt
< cs_count
&& 0xffff8 - size
>= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
) {
1037 size
+= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
;
1041 while (!size
|| (size
& 7)) {
1047 bos
[0] = ws
->buffer_create(ws
, 4 * size
, 4096,
1049 RADEON_FLAG_CPU_ACCESS
|
1050 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
1051 RADEON_FLAG_READ_ONLY
);
1052 ptr
= ws
->buffer_map(bos
[0]);
1055 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
1056 ptr
+= preamble_cs
->cdw
;
1059 for (unsigned j
= 0; j
< cnt
; ++j
) {
1060 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
1061 memcpy(ptr
, cs
->base
.buf
, 4 * cs
->base
.cdw
);
1062 ptr
+= cs
->base
.cdw
;
1066 for (unsigned j
= 0; j
< pad_words
; ++j
)
1070 ibs
[0].ib_mc_address
= radv_buffer_get_va(bos
[0]);
1073 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
,
1074 (struct radv_amdgpu_winsys_bo
**)bos
,
1075 number_of_ibs
, preamble_cs
,
1076 radv_bo_list
, &bo_list
);
1078 fprintf(stderr
, "amdgpu: buffer list creation failed "
1079 "for the sysmem submission (%d)\n", r
);
1083 memset(&request
, 0, sizeof(request
));
1085 request
.ip_type
= cs0
->hw_ip
;
1086 request
.ring
= queue_idx
;
1087 request
.resources
= bo_list
;
1088 request
.number_of_ibs
= number_of_ibs
;
1090 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
1092 sem_info
->cs_emit_signal
= (i
== cs_count
- cnt
) ? emit_signal_sem
: false;
1093 r
= radv_amdgpu_cs_submit(ctx
, &request
, sem_info
);
1096 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
1098 fprintf(stderr
, "amdgpu: The CS has been rejected, "
1099 "see dmesg for more information.\n");
1103 amdgpu_bo_list_destroy(bo_list
);
1105 for (unsigned j
= 0; j
< number_of_ibs
; j
++) {
1106 ws
->buffer_destroy(bos
[j
]);
1115 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
1117 radv_assign_last_submit(ctx
, &request
);
1122 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx
*_ctx
,
1124 struct radeon_cmdbuf
**cs_array
,
1126 struct radeon_cmdbuf
*initial_preamble_cs
,
1127 struct radeon_cmdbuf
*continue_preamble_cs
,
1128 struct radv_winsys_sem_info
*sem_info
,
1129 const struct radv_winsys_bo_list
*bo_list
,
1131 struct radeon_winsys_fence
*_fence
)
1133 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[0]);
1134 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
1138 if (!cs
->ws
->use_ib_bos
) {
1139 ret
= radv_amdgpu_winsys_cs_submit_sysmem(_ctx
, queue_idx
, sem_info
, bo_list
, cs_array
,
1140 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
1141 } else if (can_patch
&& cs
->ws
->batchchain
) {
1142 ret
= radv_amdgpu_winsys_cs_submit_chained(_ctx
, queue_idx
, sem_info
, bo_list
, cs_array
,
1143 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
1145 ret
= radv_amdgpu_winsys_cs_submit_fallback(_ctx
, queue_idx
, sem_info
, bo_list
, cs_array
,
1146 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
1149 radv_amdgpu_signal_sems(ctx
, cs
->hw_ip
, queue_idx
, sem_info
);
1153 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs
, uint64_t addr
)
1155 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
1160 for (unsigned i
= 0; i
<= cs
->num_old_ib_buffers
; ++i
) {
1161 struct radv_amdgpu_winsys_bo
*bo
;
1163 bo
= (struct radv_amdgpu_winsys_bo
*)
1164 (i
== cs
->num_old_ib_buffers
? cs
->ib_buffer
: cs
->old_ib_buffers
[i
]);
1165 if (addr
>= bo
->base
.va
&& addr
- bo
->base
.va
< bo
->size
) {
1166 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0)
1167 return (char *)ret
+ (addr
- bo
->base
.va
);
1170 if(cs
->ws
->debug_all_bos
) {
1171 pthread_mutex_lock(&cs
->ws
->global_bo_list_lock
);
1172 list_for_each_entry(struct radv_amdgpu_winsys_bo
, bo
,
1173 &cs
->ws
->global_bo_list
, global_list_item
) {
1174 if (addr
>= bo
->base
.va
&& addr
- bo
->base
.va
< bo
->size
) {
1175 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0) {
1176 pthread_mutex_unlock(&cs
->ws
->global_bo_list_lock
);
1177 return (char *)ret
+ (addr
- bo
->base
.va
);
1181 pthread_mutex_unlock(&cs
->ws
->global_bo_list_lock
);
1186 static void radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf
*_cs
,
1188 const int *trace_ids
, int trace_id_count
)
1190 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
1191 void *ib
= cs
->base
.buf
;
1192 int num_dw
= cs
->base
.cdw
;
1194 if (cs
->ws
->use_ib_bos
) {
1195 ib
= radv_amdgpu_winsys_get_cpu_addr(cs
, cs
->ib
.ib_mc_address
);
1196 num_dw
= cs
->ib
.size
;
1199 ac_parse_ib(file
, ib
, num_dw
, trace_ids
, trace_id_count
, "main IB",
1200 cs
->ws
->info
.chip_class
, radv_amdgpu_winsys_get_cpu_addr
, cs
);
1203 static uint32_t radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority
)
1205 switch (radv_priority
) {
1206 case RADEON_CTX_PRIORITY_REALTIME
:
1207 return AMDGPU_CTX_PRIORITY_VERY_HIGH
;
1208 case RADEON_CTX_PRIORITY_HIGH
:
1209 return AMDGPU_CTX_PRIORITY_HIGH
;
1210 case RADEON_CTX_PRIORITY_MEDIUM
:
1211 return AMDGPU_CTX_PRIORITY_NORMAL
;
1212 case RADEON_CTX_PRIORITY_LOW
:
1213 return AMDGPU_CTX_PRIORITY_LOW
;
1215 unreachable("Invalid context priority");
1219 static struct radeon_winsys_ctx
*radv_amdgpu_ctx_create(struct radeon_winsys
*_ws
,
1220 enum radeon_ctx_priority priority
)
1222 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1223 struct radv_amdgpu_ctx
*ctx
= CALLOC_STRUCT(radv_amdgpu_ctx
);
1224 uint32_t amdgpu_priority
= radv_to_amdgpu_priority(priority
);
1230 r
= amdgpu_cs_ctx_create2(ws
->dev
, amdgpu_priority
, &ctx
->ctx
);
1232 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r
);
1237 assert(AMDGPU_HW_IP_NUM
* MAX_RINGS_PER_TYPE
* sizeof(uint64_t) <= 4096);
1238 ctx
->fence_bo
= ws
->base
.buffer_create(&ws
->base
, 4096, 8,
1240 RADEON_FLAG_CPU_ACCESS
|
1241 RADEON_FLAG_NO_INTERPROCESS_SHARING
);
1243 ctx
->fence_map
= (uint64_t*)ws
->base
.buffer_map(ctx
->fence_bo
);
1245 memset(ctx
->fence_map
, 0, 4096);
1246 return (struct radeon_winsys_ctx
*)ctx
;
1252 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx
*rwctx
)
1254 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1255 ctx
->ws
->base
.buffer_destroy(ctx
->fence_bo
);
1256 amdgpu_cs_ctx_free(ctx
->ctx
);
1260 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx
*rwctx
,
1261 enum ring_type ring_type
, int ring_index
)
1263 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1264 int ip_type
= ring_to_hw_ip(ring_type
);
1266 if (ctx
->last_submission
[ip_type
][ring_index
].fence
.fence
) {
1268 int ret
= amdgpu_cs_query_fence_status(&ctx
->last_submission
[ip_type
][ring_index
].fence
,
1269 1000000000ull, 0, &expired
);
1271 if (ret
|| !expired
)
1278 static struct radeon_winsys_sem
*radv_amdgpu_create_sem(struct radeon_winsys
*_ws
)
1280 struct amdgpu_cs_fence
*sem
= CALLOC_STRUCT(amdgpu_cs_fence
);
1284 return (struct radeon_winsys_sem
*)sem
;
1287 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem
*_sem
)
1289 struct amdgpu_cs_fence
*sem
= (struct amdgpu_cs_fence
*)_sem
;
1293 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx
*ctx
,
1296 struct radv_winsys_sem_info
*sem_info
)
1298 for (unsigned i
= 0; i
< sem_info
->signal
.sem_count
; i
++) {
1299 struct amdgpu_cs_fence
*sem
= (struct amdgpu_cs_fence
*)(sem_info
->signal
.sem
)[i
];
1304 *sem
= ctx
->last_submission
[ip_type
][ring
].fence
;
1309 static struct drm_amdgpu_cs_chunk_sem
*radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts
*counts
,
1310 struct drm_amdgpu_cs_chunk
*chunk
, int chunk_id
)
1312 struct drm_amdgpu_cs_chunk_sem
*syncobj
= malloc(sizeof(struct drm_amdgpu_cs_chunk_sem
) * counts
->syncobj_count
);
1316 for (unsigned i
= 0; i
< counts
->syncobj_count
; i
++) {
1317 struct drm_amdgpu_cs_chunk_sem
*sem
= &syncobj
[i
];
1318 sem
->handle
= counts
->syncobj
[i
];
1321 chunk
->chunk_id
= chunk_id
;
1322 chunk
->length_dw
= sizeof(struct drm_amdgpu_cs_chunk_sem
) / 4 * counts
->syncobj_count
;
1323 chunk
->chunk_data
= (uint64_t)(uintptr_t)syncobj
;
1327 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx
*ctx
,
1328 struct amdgpu_cs_request
*request
,
1329 struct radv_winsys_sem_info
*sem_info
)
1335 struct drm_amdgpu_cs_chunk
*chunks
;
1336 struct drm_amdgpu_cs_chunk_data
*chunk_data
;
1337 struct drm_amdgpu_cs_chunk_dep
*sem_dependencies
= NULL
;
1338 struct drm_amdgpu_cs_chunk_sem
*wait_syncobj
= NULL
, *signal_syncobj
= NULL
;
1340 struct amdgpu_cs_fence
*sem
;
1342 user_fence
= (request
->fence_info
.handle
!= NULL
);
1343 size
= request
->number_of_ibs
+ (user_fence
? 2 : 1) + 3;
1345 chunks
= alloca(sizeof(struct drm_amdgpu_cs_chunk
) * size
);
1347 size
= request
->number_of_ibs
+ (user_fence
? 1 : 0);
1349 chunk_data
= alloca(sizeof(struct drm_amdgpu_cs_chunk_data
) * size
);
1351 num_chunks
= request
->number_of_ibs
;
1352 for (i
= 0; i
< request
->number_of_ibs
; i
++) {
1353 struct amdgpu_cs_ib_info
*ib
;
1354 chunks
[i
].chunk_id
= AMDGPU_CHUNK_ID_IB
;
1355 chunks
[i
].length_dw
= sizeof(struct drm_amdgpu_cs_chunk_ib
) / 4;
1356 chunks
[i
].chunk_data
= (uint64_t)(uintptr_t)&chunk_data
[i
];
1358 ib
= &request
->ibs
[i
];
1360 chunk_data
[i
].ib_data
._pad
= 0;
1361 chunk_data
[i
].ib_data
.va_start
= ib
->ib_mc_address
;
1362 chunk_data
[i
].ib_data
.ib_bytes
= ib
->size
* 4;
1363 chunk_data
[i
].ib_data
.ip_type
= request
->ip_type
;
1364 chunk_data
[i
].ib_data
.ip_instance
= request
->ip_instance
;
1365 chunk_data
[i
].ib_data
.ring
= request
->ring
;
1366 chunk_data
[i
].ib_data
.flags
= ib
->flags
;
1372 chunks
[i
].chunk_id
= AMDGPU_CHUNK_ID_FENCE
;
1373 chunks
[i
].length_dw
= sizeof(struct drm_amdgpu_cs_chunk_fence
) / 4;
1374 chunks
[i
].chunk_data
= (uint64_t)(uintptr_t)&chunk_data
[i
];
1376 amdgpu_cs_chunk_fence_info_to_data(&request
->fence_info
,
1380 if (sem_info
->wait
.syncobj_count
&& sem_info
->cs_emit_wait
) {
1381 wait_syncobj
= radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info
->wait
,
1382 &chunks
[num_chunks
],
1383 AMDGPU_CHUNK_ID_SYNCOBJ_IN
);
1384 if (!wait_syncobj
) {
1390 if (sem_info
->wait
.sem_count
== 0)
1391 sem_info
->cs_emit_wait
= false;
1395 if (sem_info
->wait
.sem_count
&& sem_info
->cs_emit_wait
) {
1396 sem_dependencies
= alloca(sizeof(struct drm_amdgpu_cs_chunk_dep
) * sem_info
->wait
.sem_count
);
1399 for (unsigned j
= 0; j
< sem_info
->wait
.sem_count
; j
++) {
1400 sem
= (struct amdgpu_cs_fence
*)sem_info
->wait
.sem
[j
];
1403 struct drm_amdgpu_cs_chunk_dep
*dep
= &sem_dependencies
[sem_count
++];
1405 amdgpu_cs_chunk_fence_to_dep(sem
, dep
);
1407 sem
->context
= NULL
;
1411 /* dependencies chunk */
1412 chunks
[i
].chunk_id
= AMDGPU_CHUNK_ID_DEPENDENCIES
;
1413 chunks
[i
].length_dw
= sizeof(struct drm_amdgpu_cs_chunk_dep
) / 4 * sem_count
;
1414 chunks
[i
].chunk_data
= (uint64_t)(uintptr_t)sem_dependencies
;
1416 sem_info
->cs_emit_wait
= false;
1419 if (sem_info
->signal
.syncobj_count
&& sem_info
->cs_emit_signal
) {
1420 signal_syncobj
= radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info
->signal
,
1421 &chunks
[num_chunks
],
1422 AMDGPU_CHUNK_ID_SYNCOBJ_OUT
);
1423 if (!signal_syncobj
) {
1430 r
= amdgpu_cs_submit_raw(ctx
->ws
->dev
,
1438 free(signal_syncobj
);
1442 static int radv_amdgpu_create_syncobj(struct radeon_winsys
*_ws
,
1445 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1446 return amdgpu_cs_create_syncobj(ws
->dev
, handle
);
1449 static void radv_amdgpu_destroy_syncobj(struct radeon_winsys
*_ws
,
1452 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1453 amdgpu_cs_destroy_syncobj(ws
->dev
, handle
);
1456 static void radv_amdgpu_reset_syncobj(struct radeon_winsys
*_ws
,
1459 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1460 amdgpu_cs_syncobj_reset(ws
->dev
, &handle
, 1);
1463 static void radv_amdgpu_signal_syncobj(struct radeon_winsys
*_ws
,
1466 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1467 amdgpu_cs_syncobj_signal(ws
->dev
, &handle
, 1);
1470 static bool radv_amdgpu_wait_syncobj(struct radeon_winsys
*_ws
, const uint32_t *handles
,
1471 uint32_t handle_count
, bool wait_all
, uint64_t timeout
)
1473 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1476 /* The timeouts are signed, while vulkan timeouts are unsigned. */
1477 timeout
= MIN2(timeout
, INT64_MAX
);
1479 int ret
= amdgpu_cs_syncobj_wait(ws
->dev
, (uint32_t*)handles
, handle_count
, timeout
,
1480 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT
|
1481 (wait_all
? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
: 0),
1485 } else if (ret
== -1 && errno
== ETIME
) {
1488 fprintf(stderr
, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno
);
1493 static int radv_amdgpu_export_syncobj(struct radeon_winsys
*_ws
,
1497 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1499 return amdgpu_cs_export_syncobj(ws
->dev
, syncobj
, fd
);
1502 static int radv_amdgpu_import_syncobj(struct radeon_winsys
*_ws
,
1506 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1508 return amdgpu_cs_import_syncobj(ws
->dev
, fd
, syncobj
);
1512 static int radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys
*_ws
,
1516 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1518 return amdgpu_cs_syncobj_export_sync_file(ws
->dev
, syncobj
, fd
);
1521 static int radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys
*_ws
,
1525 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
1527 return amdgpu_cs_syncobj_import_sync_file(ws
->dev
, syncobj
, fd
);
1530 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys
*ws
)
1532 ws
->base
.ctx_create
= radv_amdgpu_ctx_create
;
1533 ws
->base
.ctx_destroy
= radv_amdgpu_ctx_destroy
;
1534 ws
->base
.ctx_wait_idle
= radv_amdgpu_ctx_wait_idle
;
1535 ws
->base
.cs_create
= radv_amdgpu_cs_create
;
1536 ws
->base
.cs_destroy
= radv_amdgpu_cs_destroy
;
1537 ws
->base
.cs_grow
= radv_amdgpu_cs_grow
;
1538 ws
->base
.cs_finalize
= radv_amdgpu_cs_finalize
;
1539 ws
->base
.cs_reset
= radv_amdgpu_cs_reset
;
1540 ws
->base
.cs_add_buffer
= radv_amdgpu_cs_add_buffer
;
1541 ws
->base
.cs_execute_secondary
= radv_amdgpu_cs_execute_secondary
;
1542 ws
->base
.cs_submit
= radv_amdgpu_winsys_cs_submit
;
1543 ws
->base
.cs_dump
= radv_amdgpu_winsys_cs_dump
;
1544 ws
->base
.create_fence
= radv_amdgpu_create_fence
;
1545 ws
->base
.destroy_fence
= radv_amdgpu_destroy_fence
;
1546 ws
->base
.create_sem
= radv_amdgpu_create_sem
;
1547 ws
->base
.destroy_sem
= radv_amdgpu_destroy_sem
;
1548 ws
->base
.create_syncobj
= radv_amdgpu_create_syncobj
;
1549 ws
->base
.destroy_syncobj
= radv_amdgpu_destroy_syncobj
;
1550 ws
->base
.reset_syncobj
= radv_amdgpu_reset_syncobj
;
1551 ws
->base
.signal_syncobj
= radv_amdgpu_signal_syncobj
;
1552 ws
->base
.wait_syncobj
= radv_amdgpu_wait_syncobj
;
1553 ws
->base
.export_syncobj
= radv_amdgpu_export_syncobj
;
1554 ws
->base
.import_syncobj
= radv_amdgpu_import_syncobj
;
1555 ws
->base
.export_syncobj_to_sync_file
= radv_amdgpu_export_syncobj_to_sync_file
;
1556 ws
->base
.import_syncobj_from_sync_file
= radv_amdgpu_import_syncobj_from_sync_file
;
1557 ws
->base
.fence_wait
= radv_amdgpu_fence_wait
;
1558 ws
->base
.fences_wait
= radv_amdgpu_fences_wait
;