2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include <amdgpu_drm.h>
31 #include "amdgpu_id.h"
32 #include "radv_radeon_winsys.h"
33 #include "radv_amdgpu_cs.h"
34 #include "radv_amdgpu_bo.h"
39 VIRTUAL_BUFFER_HASH_TABLE_SIZE
= 1024
42 struct radv_amdgpu_cs
{
43 struct radeon_winsys_cs base
;
44 struct radv_amdgpu_winsys
*ws
;
46 struct amdgpu_cs_ib_info ib
;
48 struct radeon_winsys_bo
*ib_buffer
;
50 unsigned max_num_buffers
;
52 amdgpu_bo_handle
*handles
;
55 struct radeon_winsys_bo
**old_ib_buffers
;
56 unsigned num_old_ib_buffers
;
57 unsigned max_num_old_ib_buffers
;
58 unsigned *ib_size_ptr
;
62 int buffer_hash_table
[1024];
65 unsigned num_virtual_buffers
;
66 unsigned max_num_virtual_buffers
;
67 struct radeon_winsys_bo
**virtual_buffers
;
68 uint8_t *virtual_buffer_priorities
;
69 int *virtual_buffer_hash_table
;
72 static inline struct radv_amdgpu_cs
*
73 radv_amdgpu_cs(struct radeon_winsys_cs
*base
)
75 return (struct radv_amdgpu_cs
*)base
;
78 static int ring_to_hw_ip(enum ring_type ring
)
82 return AMDGPU_HW_IP_GFX
;
84 return AMDGPU_HW_IP_DMA
;
86 return AMDGPU_HW_IP_COMPUTE
;
88 unreachable("unsupported ring");
92 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx
*ctx
,
93 struct radv_amdgpu_fence
*fence
,
94 struct amdgpu_cs_request
*req
)
96 fence
->fence
.context
= ctx
->ctx
;
97 fence
->fence
.ip_type
= req
->ip_type
;
98 fence
->fence
.ip_instance
= req
->ip_instance
;
99 fence
->fence
.ring
= req
->ring
;
100 fence
->fence
.fence
= req
->seq_no
;
101 fence
->user_ptr
= (volatile uint64_t*)(ctx
->fence_map
+ (req
->ip_type
* MAX_RINGS_PER_TYPE
+ req
->ring
) * sizeof(uint64_t));
104 static struct radeon_winsys_fence
*radv_amdgpu_create_fence()
106 struct radv_amdgpu_fence
*fence
= calloc(1, sizeof(struct radv_amdgpu_fence
));
107 return (struct radeon_winsys_fence
*)fence
;
110 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence
*_fence
)
112 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
116 static bool radv_amdgpu_fence_wait(struct radeon_winsys
*_ws
,
117 struct radeon_winsys_fence
*_fence
,
121 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
122 unsigned flags
= absolute
? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE
: 0;
124 uint32_t expired
= 0;
126 if (fence
->user_ptr
) {
127 if (*fence
->user_ptr
>= fence
->fence
.fence
)
129 if (!absolute
&& !timeout
)
133 /* Now use the libdrm query. */
134 r
= amdgpu_cs_query_fence_status(&fence
->fence
,
140 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
150 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs
*rcs
)
152 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(rcs
);
155 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
159 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
160 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
162 free(cs
->old_ib_buffers
);
163 free(cs
->virtual_buffers
);
164 free(cs
->virtual_buffer_priorities
);
165 free(cs
->virtual_buffer_hash_table
);
167 free(cs
->priorities
);
171 static boolean
radv_amdgpu_init_cs(struct radv_amdgpu_cs
*cs
,
172 enum ring_type ring_type
)
174 for (int i
= 0; i
< ARRAY_SIZE(cs
->buffer_hash_table
); ++i
)
175 cs
->buffer_hash_table
[i
] = -1;
177 cs
->hw_ip
= ring_to_hw_ip(ring_type
);
181 static struct radeon_winsys_cs
*
182 radv_amdgpu_cs_create(struct radeon_winsys
*ws
,
183 enum ring_type ring_type
)
185 struct radv_amdgpu_cs
*cs
;
186 uint32_t ib_size
= 20 * 1024 * 4;
187 cs
= calloc(1, sizeof(struct radv_amdgpu_cs
));
191 cs
->ws
= radv_amdgpu_winsys(ws
);
192 radv_amdgpu_init_cs(cs
, ring_type
);
194 if (cs
->ws
->use_ib_bos
) {
195 cs
->ib_buffer
= ws
->buffer_create(ws
, ib_size
, 0,
197 RADEON_FLAG_CPU_ACCESS
);
198 if (!cs
->ib_buffer
) {
203 cs
->ib_mapped
= ws
->buffer_map(cs
->ib_buffer
);
204 if (!cs
->ib_mapped
) {
205 ws
->buffer_destroy(cs
->ib_buffer
);
210 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
211 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
212 cs
->base
.max_dw
= ib_size
/ 4 - 4;
213 cs
->ib_size_ptr
= &cs
->ib
.size
;
216 ws
->cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
218 cs
->base
.buf
= malloc(16384);
219 cs
->base
.max_dw
= 4096;
229 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs
*_cs
, size_t min_size
)
231 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
238 if (!cs
->ws
->use_ib_bos
) {
239 const uint64_t limit_dws
= 0xffff8;
240 uint64_t ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
241 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
243 /* The total ib size cannot exceed limit_dws dwords. */
244 if (ib_dws
> limit_dws
)
251 uint32_t *new_buf
= realloc(cs
->base
.buf
, ib_dws
* 4);
253 cs
->base
.buf
= new_buf
;
254 cs
->base
.max_dw
= ib_dws
;
262 uint64_t ib_size
= MAX2(min_size
* 4 + 16, cs
->base
.max_dw
* 4 * 2);
264 /* max that fits in the chain size field. */
265 ib_size
= MIN2(ib_size
, 0xfffff);
267 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 4)
268 cs
->base
.buf
[cs
->base
.cdw
++] = 0xffff1000;
270 *cs
->ib_size_ptr
|= cs
->base
.cdw
+ 4;
272 if (cs
->num_old_ib_buffers
== cs
->max_num_old_ib_buffers
) {
273 cs
->max_num_old_ib_buffers
= MAX2(1, cs
->max_num_old_ib_buffers
* 2);
274 cs
->old_ib_buffers
= realloc(cs
->old_ib_buffers
,
275 cs
->max_num_old_ib_buffers
* sizeof(void*));
278 cs
->old_ib_buffers
[cs
->num_old_ib_buffers
++] = cs
->ib_buffer
;
280 cs
->ib_buffer
= cs
->ws
->base
.buffer_create(&cs
->ws
->base
, ib_size
, 0,
282 RADEON_FLAG_CPU_ACCESS
);
284 if (!cs
->ib_buffer
) {
287 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
290 cs
->ib_mapped
= cs
->ws
->base
.buffer_map(cs
->ib_buffer
);
291 if (!cs
->ib_mapped
) {
292 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
295 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
298 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
300 cs
->base
.buf
[cs
->base
.cdw
++] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
301 cs
->base
.buf
[cs
->base
.cdw
++] = radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
302 cs
->base
.buf
[cs
->base
.cdw
++] = radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
>> 32;
303 cs
->ib_size_ptr
= cs
->base
.buf
+ cs
->base
.cdw
;
304 cs
->base
.buf
[cs
->base
.cdw
++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
306 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
308 cs
->base
.max_dw
= ib_size
/ 4 - 4;
312 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs
*_cs
)
314 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
316 if (cs
->ws
->use_ib_bos
) {
317 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 0)
318 cs
->base
.buf
[cs
->base
.cdw
++] = 0xffff1000;
320 *cs
->ib_size_ptr
|= cs
->base
.cdw
;
322 cs
->is_chained
= false;
328 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs
*_cs
)
330 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
334 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
335 unsigned hash
= ((uintptr_t)cs
->handles
[i
] >> 6) &
336 (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
337 cs
->buffer_hash_table
[hash
] = -1;
340 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
341 unsigned hash
= ((uintptr_t)cs
->virtual_buffers
[i
] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
342 cs
->virtual_buffer_hash_table
[hash
] = -1;
346 cs
->num_virtual_buffers
= 0;
348 if (cs
->ws
->use_ib_bos
) {
349 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
351 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
352 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
354 cs
->num_old_ib_buffers
= 0;
355 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
356 cs
->ib_size_ptr
= &cs
->ib
.size
;
361 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs
*cs
,
364 unsigned hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
365 int index
= cs
->buffer_hash_table
[hash
];
370 if (cs
->handles
[index
] == bo
)
373 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
374 if (cs
->handles
[i
] == bo
) {
375 cs
->buffer_hash_table
[hash
] = i
;
383 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs
*cs
,
388 int index
= radv_amdgpu_cs_find_buffer(cs
, bo
);
391 cs
->priorities
[index
] = MAX2(cs
->priorities
[index
], priority
);
395 if (cs
->num_buffers
== cs
->max_num_buffers
) {
396 unsigned new_count
= MAX2(1, cs
->max_num_buffers
* 2);
397 cs
->handles
= realloc(cs
->handles
, new_count
* sizeof(amdgpu_bo_handle
));
398 cs
->priorities
= realloc(cs
->priorities
, new_count
* sizeof(uint8_t));
399 cs
->max_num_buffers
= new_count
;
402 cs
->handles
[cs
->num_buffers
] = bo
;
403 cs
->priorities
[cs
->num_buffers
] = priority
;
405 hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
406 cs
->buffer_hash_table
[hash
] = cs
->num_buffers
;
411 static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs
*_cs
,
412 struct radeon_winsys_bo
*bo
,
415 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
416 unsigned hash
= ((uintptr_t)bo
>> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE
- 1);
419 if (!cs
->virtual_buffer_hash_table
) {
420 cs
->virtual_buffer_hash_table
= malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE
* sizeof(int));
421 for (int i
= 0; i
< VIRTUAL_BUFFER_HASH_TABLE_SIZE
; ++i
)
422 cs
->virtual_buffer_hash_table
[i
] = -1;
425 if (cs
->virtual_buffer_hash_table
[hash
] >= 0) {
426 int idx
= cs
->virtual_buffer_hash_table
[hash
];
427 if (cs
->virtual_buffers
[idx
] == bo
) {
428 cs
->virtual_buffer_priorities
[idx
] = MAX2(cs
->virtual_buffer_priorities
[idx
], priority
);
431 for (unsigned i
= 0; i
< cs
->num_virtual_buffers
; ++i
) {
432 if (cs
->virtual_buffers
[i
] == bo
) {
433 cs
->virtual_buffer_priorities
[i
] = MAX2(cs
->virtual_buffer_priorities
[i
], priority
);
434 cs
->virtual_buffer_hash_table
[hash
] = i
;
440 if(cs
->max_num_virtual_buffers
<= cs
->num_virtual_buffers
) {
441 cs
->max_num_virtual_buffers
= MAX2(2, cs
->max_num_virtual_buffers
* 2);
442 cs
->virtual_buffers
= realloc(cs
->virtual_buffers
, sizeof(struct radv_amdgpu_virtual_virtual_buffer
*) * cs
->max_num_virtual_buffers
);
443 cs
->virtual_buffer_priorities
= realloc(cs
->virtual_buffer_priorities
, sizeof(uint8_t) * cs
->max_num_virtual_buffers
);
446 cs
->virtual_buffers
[cs
->num_virtual_buffers
] = bo
;
447 cs
->virtual_buffer_priorities
[cs
->num_virtual_buffers
] = priority
;
449 cs
->virtual_buffer_hash_table
[hash
] = cs
->num_virtual_buffers
;
450 ++cs
->num_virtual_buffers
;
454 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs
*_cs
,
455 struct radeon_winsys_bo
*_bo
,
458 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
459 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
461 if (bo
->is_virtual
) {
462 radv_amdgpu_cs_add_virtual_buffer(_cs
, _bo
, priority
);
466 radv_amdgpu_cs_add_buffer_internal(cs
, bo
->bo
, priority
);
469 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs
*_parent
,
470 struct radeon_winsys_cs
*_child
)
472 struct radv_amdgpu_cs
*parent
= radv_amdgpu_cs(_parent
);
473 struct radv_amdgpu_cs
*child
= radv_amdgpu_cs(_child
);
475 for (unsigned i
= 0; i
< child
->num_buffers
; ++i
) {
476 radv_amdgpu_cs_add_buffer_internal(parent
, child
->handles
[i
],
477 child
->priorities
[i
]);
480 for (unsigned i
= 0; i
< child
->num_virtual_buffers
; ++i
) {
481 radv_amdgpu_cs_add_buffer(&parent
->base
, child
->virtual_buffers
[i
],
482 child
->virtual_buffer_priorities
[i
]);
485 if (parent
->ws
->use_ib_bos
) {
486 if (parent
->base
.cdw
+ 4 > parent
->base
.max_dw
)
487 radv_amdgpu_cs_grow(&parent
->base
, 4);
489 parent
->base
.buf
[parent
->base
.cdw
++] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
490 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.ib_mc_address
;
491 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.ib_mc_address
>> 32;
492 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.size
;
494 if (parent
->base
.cdw
+ child
->base
.cdw
> parent
->base
.max_dw
)
495 radv_amdgpu_cs_grow(&parent
->base
, child
->base
.cdw
);
497 memcpy(parent
->base
.buf
+ parent
->base
.cdw
, child
->base
.buf
, 4 * child
->base
.cdw
);
498 parent
->base
.cdw
+= child
->base
.cdw
;
502 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys
*ws
,
503 struct radeon_winsys_cs
**cs_array
,
505 struct radv_amdgpu_winsys_bo
*extra_bo
,
506 struct radeon_winsys_cs
*extra_cs
,
507 amdgpu_bo_list_handle
*bo_list
)
510 if (ws
->debug_all_bos
) {
511 struct radv_amdgpu_winsys_bo
*bo
;
512 amdgpu_bo_handle
*handles
;
515 pthread_mutex_lock(&ws
->global_bo_list_lock
);
517 handles
= malloc(sizeof(handles
[0]) * ws
->num_buffers
);
519 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
523 LIST_FOR_EACH_ENTRY(bo
, &ws
->global_bo_list
, global_list_item
) {
524 assert(num
< ws
->num_buffers
);
525 handles
[num
++] = bo
->bo
;
528 r
= amdgpu_bo_list_create(ws
->dev
, ws
->num_buffers
,
532 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
533 } else if (count
== 1 && !extra_bo
&& !extra_cs
&&
534 !radv_amdgpu_cs(cs_array
[0])->num_virtual_buffers
) {
535 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[0];
536 r
= amdgpu_bo_list_create(ws
->dev
, cs
->num_buffers
, cs
->handles
,
537 cs
->priorities
, bo_list
);
539 unsigned total_buffer_count
= !!extra_bo
;
540 unsigned unique_bo_count
= !!extra_bo
;
541 for (unsigned i
= 0; i
< count
; ++i
) {
542 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
543 total_buffer_count
+= cs
->num_buffers
;
544 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
)
545 total_buffer_count
+= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
])->bo_count
;
549 total_buffer_count
+= ((struct radv_amdgpu_cs
*)extra_cs
)->num_buffers
;
552 amdgpu_bo_handle
*handles
= malloc(sizeof(amdgpu_bo_handle
) * total_buffer_count
);
553 uint8_t *priorities
= malloc(sizeof(uint8_t) * total_buffer_count
);
554 if (!handles
|| !priorities
) {
561 handles
[0] = extra_bo
->bo
;
565 for (unsigned i
= 0; i
< count
+ !!extra_cs
; ++i
) {
566 struct radv_amdgpu_cs
*cs
;
569 cs
= (struct radv_amdgpu_cs
*)extra_cs
;
571 cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
573 if (!cs
->num_buffers
)
576 if (unique_bo_count
== 0) {
577 memcpy(handles
, cs
->handles
, cs
->num_buffers
* sizeof(amdgpu_bo_handle
));
578 memcpy(priorities
, cs
->priorities
, cs
->num_buffers
* sizeof(uint8_t));
579 unique_bo_count
= cs
->num_buffers
;
582 int unique_bo_so_far
= unique_bo_count
;
583 for (unsigned j
= 0; j
< cs
->num_buffers
; ++j
) {
585 for (unsigned k
= 0; k
< unique_bo_so_far
; ++k
) {
586 if (handles
[k
] == cs
->handles
[j
]) {
588 priorities
[k
] = MAX2(priorities
[k
],
594 handles
[unique_bo_count
] = cs
->handles
[j
];
595 priorities
[unique_bo_count
] = cs
->priorities
[j
];
599 for (unsigned j
= 0; j
< cs
->num_virtual_buffers
; ++j
) {
600 struct radv_amdgpu_winsys_bo
*virtual_bo
= radv_amdgpu_winsys_bo(cs
->virtual_buffers
[j
]);
601 for(unsigned k
= 0; k
< virtual_bo
->bo_count
; ++k
) {
602 struct radv_amdgpu_winsys_bo
*bo
= virtual_bo
->bos
[k
];
604 for (unsigned m
= 0; m
< unique_bo_count
; ++m
) {
605 if (handles
[m
] == bo
->bo
) {
607 priorities
[m
] = MAX2(priorities
[m
],
608 cs
->virtual_buffer_priorities
[j
]);
613 handles
[unique_bo_count
] = bo
->bo
;
614 priorities
[unique_bo_count
] = cs
->virtual_buffer_priorities
[j
];
620 r
= amdgpu_bo_list_create(ws
->dev
, unique_bo_count
, handles
,
621 priorities
, bo_list
);
630 static struct amdgpu_cs_fence_info
radv_set_cs_fence(struct radv_amdgpu_ctx
*ctx
, int ip_type
, int ring
)
632 struct amdgpu_cs_fence_info ret
= {0};
633 if (ctx
->fence_map
) {
634 ret
.handle
= radv_amdgpu_winsys_bo(ctx
->fence_bo
)->bo
;
635 ret
.offset
= (ip_type
* MAX_RINGS_PER_TYPE
+ ring
) * sizeof(uint64_t);
640 static void radv_assign_last_submit(struct radv_amdgpu_ctx
*ctx
,
641 struct amdgpu_cs_request
*request
)
643 radv_amdgpu_request_to_fence(ctx
,
644 &ctx
->last_submission
[request
->ip_type
][request
->ring
],
648 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx
*_ctx
,
650 struct radeon_winsys_cs
**cs_array
,
652 struct radeon_winsys_cs
*initial_preamble_cs
,
653 struct radeon_winsys_cs
*continue_preamble_cs
,
654 struct radeon_winsys_fence
*_fence
)
657 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
658 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
659 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
660 amdgpu_bo_list_handle bo_list
;
661 struct amdgpu_cs_request request
= {0};
662 struct amdgpu_cs_ib_info ibs
[2];
664 for (unsigned i
= cs_count
; i
--;) {
665 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
667 if (cs
->is_chained
) {
668 *cs
->ib_size_ptr
-= 4;
669 cs
->is_chained
= false;
672 if (i
+ 1 < cs_count
) {
673 struct radv_amdgpu_cs
*next
= radv_amdgpu_cs(cs_array
[i
+ 1]);
674 assert(cs
->base
.cdw
+ 4 <= cs
->base
.max_dw
);
676 cs
->is_chained
= true;
677 *cs
->ib_size_ptr
+= 4;
679 cs
->base
.buf
[cs
->base
.cdw
+ 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
680 cs
->base
.buf
[cs
->base
.cdw
+ 1] = next
->ib
.ib_mc_address
;
681 cs
->base
.buf
[cs
->base
.cdw
+ 2] = next
->ib
.ib_mc_address
>> 32;
682 cs
->base
.buf
[cs
->base
.cdw
+ 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next
->ib
.size
;
686 r
= radv_amdgpu_create_bo_list(cs0
->ws
, cs_array
, cs_count
, NULL
, initial_preamble_cs
, &bo_list
);
688 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
692 request
.ip_type
= cs0
->hw_ip
;
693 request
.ring
= queue_idx
;
694 request
.number_of_ibs
= 1;
695 request
.ibs
= &cs0
->ib
;
696 request
.resources
= bo_list
;
697 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
699 if (initial_preamble_cs
) {
701 request
.number_of_ibs
= 2;
703 ibs
[0] = ((struct radv_amdgpu_cs
*)initial_preamble_cs
)->ib
;
706 r
= amdgpu_cs_submit(ctx
->ctx
, 0, &request
, 1);
709 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
711 fprintf(stderr
, "amdgpu: The CS has been rejected, "
712 "see dmesg for more information.\n");
715 amdgpu_bo_list_destroy(bo_list
);
718 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
720 radv_assign_last_submit(ctx
, &request
);
725 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx
*_ctx
,
727 struct radeon_winsys_cs
**cs_array
,
729 struct radeon_winsys_cs
*initial_preamble_cs
,
730 struct radeon_winsys_cs
*continue_preamble_cs
,
731 struct radeon_winsys_fence
*_fence
)
734 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
735 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
736 amdgpu_bo_list_handle bo_list
;
737 struct amdgpu_cs_request request
;
741 for (unsigned i
= 0; i
< cs_count
;) {
742 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[i
]);
743 struct amdgpu_cs_ib_info ibs
[AMDGPU_CS_MAX_IBS_PER_SUBMIT
];
744 struct radeon_winsys_cs
*preamble_cs
= i
? continue_preamble_cs
: initial_preamble_cs
;
745 unsigned cnt
= MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT
- !!preamble_cs
,
748 memset(&request
, 0, sizeof(request
));
750 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
, NULL
,
751 preamble_cs
, &bo_list
);
753 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
757 request
.ip_type
= cs0
->hw_ip
;
758 request
.ring
= queue_idx
;
759 request
.resources
= bo_list
;
760 request
.number_of_ibs
= cnt
+ !!preamble_cs
;
762 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
765 ibs
[0] = radv_amdgpu_cs(preamble_cs
)->ib
;
768 for (unsigned j
= 0; j
< cnt
; ++j
) {
769 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
770 ibs
[j
+ !!preamble_cs
] = cs
->ib
;
772 if (cs
->is_chained
) {
773 *cs
->ib_size_ptr
-= 4;
774 cs
->is_chained
= false;
778 r
= amdgpu_cs_submit(ctx
->ctx
, 0, &request
, 1);
781 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
783 fprintf(stderr
, "amdgpu: The CS has been rejected, "
784 "see dmesg for more information.\n");
787 amdgpu_bo_list_destroy(bo_list
);
795 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
797 radv_assign_last_submit(ctx
, &request
);
802 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx
*_ctx
,
804 struct radeon_winsys_cs
**cs_array
,
806 struct radeon_winsys_cs
*initial_preamble_cs
,
807 struct radeon_winsys_cs
*continue_preamble_cs
,
808 struct radeon_winsys_fence
*_fence
)
811 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
812 struct radv_amdgpu_fence
*fence
= (struct radv_amdgpu_fence
*)_fence
;
813 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
814 struct radeon_winsys
*ws
= (struct radeon_winsys
*)cs0
->ws
;
815 amdgpu_bo_list_handle bo_list
;
816 struct amdgpu_cs_request request
;
817 uint32_t pad_word
= 0xffff1000U
;
819 if (radv_amdgpu_winsys(ws
)->info
.chip_class
== SI
)
820 pad_word
= 0x80000000;
824 for (unsigned i
= 0; i
< cs_count
;) {
825 struct amdgpu_cs_ib_info ib
= {0};
826 struct radeon_winsys_bo
*bo
= NULL
;
827 struct radeon_winsys_cs
*preamble_cs
= i
? continue_preamble_cs
: initial_preamble_cs
;
833 size
+= preamble_cs
->cdw
;
835 while (i
+ cnt
< cs_count
&& 0xffff8 - size
>= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
) {
836 size
+= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
;
842 bo
= ws
->buffer_create(ws
, 4 * size
, 4096, RADEON_DOMAIN_GTT
, RADEON_FLAG_CPU_ACCESS
);
843 ptr
= ws
->buffer_map(bo
);
846 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
847 ptr
+= preamble_cs
->cdw
;
850 for (unsigned j
= 0; j
< cnt
; ++j
) {
851 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
852 memcpy(ptr
, cs
->base
.buf
, 4 * cs
->base
.cdw
);
857 while(!size
|| (size
& 7)) {
862 memset(&request
, 0, sizeof(request
));
865 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
,
866 (struct radv_amdgpu_winsys_bo
*)bo
,
867 preamble_cs
, &bo_list
);
869 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
874 ib
.ib_mc_address
= ws
->buffer_get_va(bo
);
876 request
.ip_type
= cs0
->hw_ip
;
877 request
.ring
= queue_idx
;
878 request
.resources
= bo_list
;
879 request
.number_of_ibs
= 1;
881 request
.fence_info
= radv_set_cs_fence(ctx
, cs0
->hw_ip
, queue_idx
);
883 r
= amdgpu_cs_submit(ctx
->ctx
, 0, &request
, 1);
886 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
888 fprintf(stderr
, "amdgpu: The CS has been rejected, "
889 "see dmesg for more information.\n");
892 amdgpu_bo_list_destroy(bo_list
);
894 ws
->buffer_destroy(bo
);
901 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
903 radv_assign_last_submit(ctx
, &request
);
908 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx
*_ctx
,
910 struct radeon_winsys_cs
**cs_array
,
912 struct radeon_winsys_cs
*initial_preamble_cs
,
913 struct radeon_winsys_cs
*continue_preamble_cs
,
914 struct radeon_winsys_sem
**wait_sem
,
915 unsigned wait_sem_count
,
916 struct radeon_winsys_sem
**signal_sem
,
917 unsigned signal_sem_count
,
919 struct radeon_winsys_fence
*_fence
)
921 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[0]);
922 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
926 for (i
= 0; i
< wait_sem_count
; i
++) {
927 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)wait_sem
[i
];
928 amdgpu_cs_wait_semaphore(ctx
->ctx
, cs
->hw_ip
, 0, queue_idx
,
931 if (!cs
->ws
->use_ib_bos
) {
932 ret
= radv_amdgpu_winsys_cs_submit_sysmem(_ctx
, queue_idx
, cs_array
,
933 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
934 } else if (can_patch
&& cs_count
> AMDGPU_CS_MAX_IBS_PER_SUBMIT
&& cs
->ws
->batchchain
) {
935 ret
= radv_amdgpu_winsys_cs_submit_chained(_ctx
, queue_idx
, cs_array
,
936 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
938 ret
= radv_amdgpu_winsys_cs_submit_fallback(_ctx
, queue_idx
, cs_array
,
939 cs_count
, initial_preamble_cs
, continue_preamble_cs
, _fence
);
942 for (i
= 0; i
< signal_sem_count
; i
++) {
943 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)signal_sem
[i
];
944 amdgpu_cs_signal_semaphore(ctx
->ctx
, cs
->hw_ip
, 0, queue_idx
,
951 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs
, uint64_t addr
)
953 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
958 for (unsigned i
= 0; i
<= cs
->num_old_ib_buffers
; ++i
) {
959 struct radv_amdgpu_winsys_bo
*bo
;
961 bo
= (struct radv_amdgpu_winsys_bo
*)
962 (i
== cs
->num_old_ib_buffers
? cs
->ib_buffer
: cs
->old_ib_buffers
[i
]);
963 if (addr
>= bo
->va
&& addr
- bo
->va
< bo
->size
) {
964 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0)
965 return (char *)ret
+ (addr
- bo
->va
);
971 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs
*_cs
,
975 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
976 void *ib
= cs
->base
.buf
;
977 int num_dw
= cs
->base
.cdw
;
979 if (cs
->ws
->use_ib_bos
) {
980 ib
= radv_amdgpu_winsys_get_cpu_addr(cs
, cs
->ib
.ib_mc_address
);
981 num_dw
= cs
->ib
.size
;
984 ac_parse_ib(file
, ib
, num_dw
, trace_id
, "main IB", cs
->ws
->info
.chip_class
,
985 radv_amdgpu_winsys_get_cpu_addr
, cs
);
988 static struct radeon_winsys_ctx
*radv_amdgpu_ctx_create(struct radeon_winsys
*_ws
)
990 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
991 struct radv_amdgpu_ctx
*ctx
= CALLOC_STRUCT(radv_amdgpu_ctx
);
996 r
= amdgpu_cs_ctx_create(ws
->dev
, &ctx
->ctx
);
998 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r
);
1003 assert(AMDGPU_HW_IP_NUM
* MAX_RINGS_PER_TYPE
* sizeof(uint64_t) <= 4096);
1004 ctx
->fence_bo
= ws
->base
.buffer_create(&ws
->base
, 4096, 8,
1006 RADEON_FLAG_CPU_ACCESS
);
1008 ctx
->fence_map
= (uint64_t*)ws
->base
.buffer_map(ctx
->fence_bo
);
1010 memset(ctx
->fence_map
, 0, 4096);
1011 return (struct radeon_winsys_ctx
*)ctx
;
1017 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx
*rwctx
)
1019 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1020 ctx
->ws
->base
.buffer_destroy(ctx
->fence_bo
);
1021 amdgpu_cs_ctx_free(ctx
->ctx
);
1025 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx
*rwctx
,
1026 enum ring_type ring_type
, int ring_index
)
1028 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
1029 int ip_type
= ring_to_hw_ip(ring_type
);
1031 if (ctx
->last_submission
[ip_type
][ring_index
].fence
.fence
) {
1033 int ret
= amdgpu_cs_query_fence_status(&ctx
->last_submission
[ip_type
][ring_index
].fence
,
1034 1000000000ull, 0, &expired
);
1036 if (ret
|| !expired
)
1043 static struct radeon_winsys_sem
*radv_amdgpu_create_sem(struct radeon_winsys
*_ws
)
1046 amdgpu_semaphore_handle sem
;
1048 ret
= amdgpu_cs_create_semaphore(&sem
);
1051 return (struct radeon_winsys_sem
*)sem
;
1054 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem
*_sem
)
1056 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)_sem
;
1057 amdgpu_cs_destroy_semaphore(sem
);
1060 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys
*ws
)
1062 ws
->base
.ctx_create
= radv_amdgpu_ctx_create
;
1063 ws
->base
.ctx_destroy
= radv_amdgpu_ctx_destroy
;
1064 ws
->base
.ctx_wait_idle
= radv_amdgpu_ctx_wait_idle
;
1065 ws
->base
.cs_create
= radv_amdgpu_cs_create
;
1066 ws
->base
.cs_destroy
= radv_amdgpu_cs_destroy
;
1067 ws
->base
.cs_grow
= radv_amdgpu_cs_grow
;
1068 ws
->base
.cs_finalize
= radv_amdgpu_cs_finalize
;
1069 ws
->base
.cs_reset
= radv_amdgpu_cs_reset
;
1070 ws
->base
.cs_add_buffer
= radv_amdgpu_cs_add_buffer
;
1071 ws
->base
.cs_execute_secondary
= radv_amdgpu_cs_execute_secondary
;
1072 ws
->base
.cs_submit
= radv_amdgpu_winsys_cs_submit
;
1073 ws
->base
.cs_dump
= radv_amdgpu_winsys_cs_dump
;
1074 ws
->base
.create_fence
= radv_amdgpu_create_fence
;
1075 ws
->base
.destroy_fence
= radv_amdgpu_destroy_fence
;
1076 ws
->base
.create_sem
= radv_amdgpu_create_sem
;
1077 ws
->base
.destroy_sem
= radv_amdgpu_destroy_sem
;
1078 ws
->base
.fence_wait
= radv_amdgpu_fence_wait
;