2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include <amdgpu_drm.h>
31 #include "amdgpu_id.h"
32 #include "radv_radeon_winsys.h"
33 #include "radv_amdgpu_cs.h"
34 #include "radv_amdgpu_bo.h"
37 struct radv_amdgpu_cs
{
38 struct radeon_winsys_cs base
;
39 struct radv_amdgpu_winsys
*ws
;
41 struct amdgpu_cs_ib_info ib
;
43 struct radeon_winsys_bo
*ib_buffer
;
45 unsigned max_num_buffers
;
47 amdgpu_bo_handle
*handles
;
50 struct radeon_winsys_bo
**old_ib_buffers
;
51 unsigned num_old_ib_buffers
;
52 unsigned max_num_old_ib_buffers
;
53 unsigned *ib_size_ptr
;
57 int buffer_hash_table
[1024];
61 static inline struct radv_amdgpu_cs
*
62 radv_amdgpu_cs(struct radeon_winsys_cs
*base
)
64 return (struct radv_amdgpu_cs
*)base
;
67 static int ring_to_hw_ip(enum ring_type ring
)
71 return AMDGPU_HW_IP_GFX
;
73 return AMDGPU_HW_IP_DMA
;
75 return AMDGPU_HW_IP_COMPUTE
;
77 unreachable("unsupported ring");
81 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx
*ctx
,
82 struct amdgpu_cs_fence
*fence
,
83 struct amdgpu_cs_request
*req
)
85 fence
->context
= ctx
->ctx
;
86 fence
->ip_type
= req
->ip_type
;
87 fence
->ip_instance
= req
->ip_instance
;
88 fence
->ring
= req
->ring
;
89 fence
->fence
= req
->seq_no
;
92 static struct radeon_winsys_fence
*radv_amdgpu_create_fence()
94 struct radv_amdgpu_cs_fence
*fence
= calloc(1, sizeof(struct amdgpu_cs_fence
));
95 return (struct radeon_winsys_fence
*)fence
;
98 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence
*_fence
)
100 struct amdgpu_cs_fence
*fence
= (struct amdgpu_cs_fence
*)_fence
;
104 static bool radv_amdgpu_fence_wait(struct radeon_winsys
*_ws
,
105 struct radeon_winsys_fence
*_fence
,
109 struct amdgpu_cs_fence
*fence
= (struct amdgpu_cs_fence
*)_fence
;
110 unsigned flags
= absolute
? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE
: 0;
112 uint32_t expired
= 0;
114 /* Now use the libdrm query. */
115 r
= amdgpu_cs_query_fence_status(fence
,
121 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
131 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs
*rcs
)
133 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(rcs
);
136 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
140 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
141 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
143 free(cs
->old_ib_buffers
);
145 free(cs
->priorities
);
149 static boolean
radv_amdgpu_init_cs(struct radv_amdgpu_cs
*cs
,
150 enum ring_type ring_type
)
152 for (int i
= 0; i
< ARRAY_SIZE(cs
->buffer_hash_table
); ++i
)
153 cs
->buffer_hash_table
[i
] = -1;
155 cs
->hw_ip
= ring_to_hw_ip(ring_type
);
159 static struct radeon_winsys_cs
*
160 radv_amdgpu_cs_create(struct radeon_winsys
*ws
,
161 enum ring_type ring_type
)
163 struct radv_amdgpu_cs
*cs
;
164 uint32_t ib_size
= 20 * 1024 * 4;
165 cs
= calloc(1, sizeof(struct radv_amdgpu_cs
));
169 cs
->ws
= radv_amdgpu_winsys(ws
);
170 radv_amdgpu_init_cs(cs
, ring_type
);
172 if (cs
->ws
->use_ib_bos
) {
173 cs
->ib_buffer
= ws
->buffer_create(ws
, ib_size
, 0,
175 RADEON_FLAG_CPU_ACCESS
);
176 if (!cs
->ib_buffer
) {
181 cs
->ib_mapped
= ws
->buffer_map(cs
->ib_buffer
);
182 if (!cs
->ib_mapped
) {
183 ws
->buffer_destroy(cs
->ib_buffer
);
188 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
189 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
190 cs
->base
.max_dw
= ib_size
/ 4 - 4;
191 cs
->ib_size_ptr
= &cs
->ib
.size
;
194 ws
->cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
196 cs
->base
.buf
= malloc(16384);
197 cs
->base
.max_dw
= 4096;
207 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs
*_cs
, size_t min_size
)
209 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
216 if (!cs
->ws
->use_ib_bos
) {
217 const uint64_t limit_dws
= 0xffff8;
218 uint64_t ib_dws
= MAX2(cs
->base
.cdw
+ min_size
,
219 MIN2(cs
->base
.max_dw
* 2, limit_dws
));
221 /* The total ib size cannot exceed limit_dws dwords. */
222 if (ib_dws
> limit_dws
)
229 uint32_t *new_buf
= realloc(cs
->base
.buf
, ib_dws
* 4);
231 cs
->base
.buf
= new_buf
;
232 cs
->base
.max_dw
= ib_dws
;
240 uint64_t ib_size
= MAX2(min_size
* 4 + 16, cs
->base
.max_dw
* 4 * 2);
242 /* max that fits in the chain size field. */
243 ib_size
= MIN2(ib_size
, 0xfffff);
245 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 4)
246 cs
->base
.buf
[cs
->base
.cdw
++] = 0xffff1000;
248 *cs
->ib_size_ptr
|= cs
->base
.cdw
+ 4;
250 if (cs
->num_old_ib_buffers
== cs
->max_num_old_ib_buffers
) {
251 cs
->max_num_old_ib_buffers
= MAX2(1, cs
->max_num_old_ib_buffers
* 2);
252 cs
->old_ib_buffers
= realloc(cs
->old_ib_buffers
,
253 cs
->max_num_old_ib_buffers
* sizeof(void*));
256 cs
->old_ib_buffers
[cs
->num_old_ib_buffers
++] = cs
->ib_buffer
;
258 cs
->ib_buffer
= cs
->ws
->base
.buffer_create(&cs
->ws
->base
, ib_size
, 0,
260 RADEON_FLAG_CPU_ACCESS
);
262 if (!cs
->ib_buffer
) {
265 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
268 cs
->ib_mapped
= cs
->ws
->base
.buffer_map(cs
->ib_buffer
);
269 if (!cs
->ib_mapped
) {
270 cs
->ws
->base
.buffer_destroy(cs
->ib_buffer
);
273 cs
->ib_buffer
= cs
->old_ib_buffers
[--cs
->num_old_ib_buffers
];
276 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
278 cs
->base
.buf
[cs
->base
.cdw
++] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
279 cs
->base
.buf
[cs
->base
.cdw
++] = radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
280 cs
->base
.buf
[cs
->base
.cdw
++] = radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
>> 32;
281 cs
->ib_size_ptr
= cs
->base
.buf
+ cs
->base
.cdw
;
282 cs
->base
.buf
[cs
->base
.cdw
++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
284 cs
->base
.buf
= (uint32_t *)cs
->ib_mapped
;
286 cs
->base
.max_dw
= ib_size
/ 4 - 4;
290 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs
*_cs
)
292 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
294 if (cs
->ws
->use_ib_bos
) {
295 while (!cs
->base
.cdw
|| (cs
->base
.cdw
& 7) != 0)
296 cs
->base
.buf
[cs
->base
.cdw
++] = 0xffff1000;
298 *cs
->ib_size_ptr
|= cs
->base
.cdw
;
300 cs
->is_chained
= false;
306 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs
*_cs
)
308 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
312 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
313 unsigned hash
= ((uintptr_t)cs
->handles
[i
] >> 6) &
314 (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
315 cs
->buffer_hash_table
[hash
] = -1;
320 if (cs
->ws
->use_ib_bos
) {
321 cs
->ws
->base
.cs_add_buffer(&cs
->base
, cs
->ib_buffer
, 8);
323 for (unsigned i
= 0; i
< cs
->num_old_ib_buffers
; ++i
)
324 cs
->ws
->base
.buffer_destroy(cs
->old_ib_buffers
[i
]);
326 cs
->num_old_ib_buffers
= 0;
327 cs
->ib
.ib_mc_address
= radv_amdgpu_winsys_bo(cs
->ib_buffer
)->va
;
328 cs
->ib_size_ptr
= &cs
->ib
.size
;
333 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs
*cs
,
336 unsigned hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
337 int index
= cs
->buffer_hash_table
[hash
];
342 if (cs
->handles
[index
] == bo
)
345 for (unsigned i
= 0; i
< cs
->num_buffers
; ++i
) {
346 if (cs
->handles
[i
] == bo
) {
347 cs
->buffer_hash_table
[hash
] = i
;
355 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs
*cs
,
360 int index
= radv_amdgpu_cs_find_buffer(cs
, bo
);
363 cs
->priorities
[index
] = MAX2(cs
->priorities
[index
], priority
);
367 if (cs
->num_buffers
== cs
->max_num_buffers
) {
368 unsigned new_count
= MAX2(1, cs
->max_num_buffers
* 2);
369 cs
->handles
= realloc(cs
->handles
, new_count
* sizeof(amdgpu_bo_handle
));
370 cs
->priorities
= realloc(cs
->priorities
, new_count
* sizeof(uint8_t));
371 cs
->max_num_buffers
= new_count
;
374 cs
->handles
[cs
->num_buffers
] = bo
;
375 cs
->priorities
[cs
->num_buffers
] = priority
;
377 hash
= ((uintptr_t)bo
>> 6) & (ARRAY_SIZE(cs
->buffer_hash_table
) - 1);
378 cs
->buffer_hash_table
[hash
] = cs
->num_buffers
;
383 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs
*_cs
,
384 struct radeon_winsys_bo
*_bo
,
387 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(_cs
);
388 struct radv_amdgpu_winsys_bo
*bo
= radv_amdgpu_winsys_bo(_bo
);
390 radv_amdgpu_cs_add_buffer_internal(cs
, bo
->bo
, priority
);
393 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs
*_parent
,
394 struct radeon_winsys_cs
*_child
)
396 struct radv_amdgpu_cs
*parent
= radv_amdgpu_cs(_parent
);
397 struct radv_amdgpu_cs
*child
= radv_amdgpu_cs(_child
);
399 for (unsigned i
= 0; i
< child
->num_buffers
; ++i
) {
400 radv_amdgpu_cs_add_buffer_internal(parent
, child
->handles
[i
],
401 child
->priorities
[i
]);
404 if (parent
->ws
->use_ib_bos
) {
405 if (parent
->base
.cdw
+ 4 > parent
->base
.max_dw
)
406 radv_amdgpu_cs_grow(&parent
->base
, 4);
408 parent
->base
.buf
[parent
->base
.cdw
++] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
409 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.ib_mc_address
;
410 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.ib_mc_address
>> 32;
411 parent
->base
.buf
[parent
->base
.cdw
++] = child
->ib
.size
;
413 if (parent
->base
.cdw
+ child
->base
.cdw
> parent
->base
.max_dw
)
414 radv_amdgpu_cs_grow(&parent
->base
, child
->base
.cdw
);
416 memcpy(parent
->base
.buf
+ parent
->base
.cdw
, child
->base
.buf
, 4 * child
->base
.cdw
);
417 parent
->base
.cdw
+= child
->base
.cdw
;
421 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys
*ws
,
422 struct radeon_winsys_cs
**cs_array
,
424 struct radv_amdgpu_winsys_bo
*extra_bo
,
425 struct radeon_winsys_cs
*extra_cs
,
426 amdgpu_bo_list_handle
*bo_list
)
429 if (ws
->debug_all_bos
) {
430 struct radv_amdgpu_winsys_bo
*bo
;
431 amdgpu_bo_handle
*handles
;
434 pthread_mutex_lock(&ws
->global_bo_list_lock
);
436 handles
= malloc(sizeof(handles
[0]) * ws
->num_buffers
);
438 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
442 LIST_FOR_EACH_ENTRY(bo
, &ws
->global_bo_list
, global_list_item
) {
443 assert(num
< ws
->num_buffers
);
444 handles
[num
++] = bo
->bo
;
447 r
= amdgpu_bo_list_create(ws
->dev
, ws
->num_buffers
,
451 pthread_mutex_unlock(&ws
->global_bo_list_lock
);
452 } else if (count
== 1 && !extra_bo
&& !extra_cs
) {
453 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[0];
454 r
= amdgpu_bo_list_create(ws
->dev
, cs
->num_buffers
, cs
->handles
,
455 cs
->priorities
, bo_list
);
457 unsigned total_buffer_count
= !!extra_bo
;
458 unsigned unique_bo_count
= !!extra_bo
;
459 for (unsigned i
= 0; i
< count
; ++i
) {
460 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
461 total_buffer_count
+= cs
->num_buffers
;
465 total_buffer_count
+= ((struct radv_amdgpu_cs
*)extra_cs
)->num_buffers
;
468 amdgpu_bo_handle
*handles
= malloc(sizeof(amdgpu_bo_handle
) * total_buffer_count
);
469 uint8_t *priorities
= malloc(sizeof(uint8_t) * total_buffer_count
);
470 if (!handles
|| !priorities
) {
477 handles
[0] = extra_bo
->bo
;
481 for (unsigned i
= 0; i
< count
+ !!extra_cs
; ++i
) {
482 struct radv_amdgpu_cs
*cs
;
485 cs
= (struct radv_amdgpu_cs
*)extra_cs
;
487 cs
= (struct radv_amdgpu_cs
*)cs_array
[i
];
489 if (!cs
->num_buffers
)
492 if (unique_bo_count
== 0) {
493 memcpy(handles
, cs
->handles
, cs
->num_buffers
* sizeof(amdgpu_bo_handle
));
494 memcpy(priorities
, cs
->priorities
, cs
->num_buffers
* sizeof(uint8_t));
495 unique_bo_count
= cs
->num_buffers
;
498 int unique_bo_so_far
= unique_bo_count
;
499 for (unsigned j
= 0; j
< cs
->num_buffers
; ++j
) {
501 for (unsigned k
= 0; k
< unique_bo_so_far
; ++k
) {
502 if (handles
[k
] == cs
->handles
[j
]) {
504 priorities
[k
] = MAX2(priorities
[k
],
510 handles
[unique_bo_count
] = cs
->handles
[j
];
511 priorities
[unique_bo_count
] = cs
->priorities
[j
];
516 r
= amdgpu_bo_list_create(ws
->dev
, unique_bo_count
, handles
,
517 priorities
, bo_list
);
526 static void radv_assign_last_submit(struct radv_amdgpu_ctx
*ctx
,
527 struct amdgpu_cs_request
*request
)
529 radv_amdgpu_request_to_fence(ctx
,
530 &ctx
->last_submission
[request
->ip_type
][request
->ring
],
534 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx
*_ctx
,
536 struct radeon_winsys_cs
**cs_array
,
538 struct radeon_winsys_cs
*preamble_cs
,
539 struct radeon_winsys_fence
*_fence
)
542 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
543 struct amdgpu_cs_fence
*fence
= (struct amdgpu_cs_fence
*)_fence
;
544 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
545 amdgpu_bo_list_handle bo_list
;
546 struct amdgpu_cs_request request
= {0};
547 struct amdgpu_cs_ib_info ibs
[2];
549 for (unsigned i
= cs_count
; i
--;) {
550 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
]);
552 if (cs
->is_chained
) {
553 *cs
->ib_size_ptr
-= 4;
554 cs
->is_chained
= false;
557 if (i
+ 1 < cs_count
) {
558 struct radv_amdgpu_cs
*next
= radv_amdgpu_cs(cs_array
[i
+ 1]);
559 assert(cs
->base
.cdw
+ 4 <= cs
->base
.max_dw
);
561 cs
->is_chained
= true;
562 *cs
->ib_size_ptr
+= 4;
564 cs
->base
.buf
[cs
->base
.cdw
+ 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0);
565 cs
->base
.buf
[cs
->base
.cdw
+ 1] = next
->ib
.ib_mc_address
;
566 cs
->base
.buf
[cs
->base
.cdw
+ 2] = next
->ib
.ib_mc_address
>> 32;
567 cs
->base
.buf
[cs
->base
.cdw
+ 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next
->ib
.size
;
571 r
= radv_amdgpu_create_bo_list(cs0
->ws
, cs_array
, cs_count
, NULL
, preamble_cs
, &bo_list
);
573 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
577 request
.ip_type
= cs0
->hw_ip
;
578 request
.ring
= queue_idx
;
579 request
.number_of_ibs
= 1;
580 request
.ibs
= &cs0
->ib
;
581 request
.resources
= bo_list
;
585 request
.number_of_ibs
= 2;
587 ibs
[0] = ((struct radv_amdgpu_cs
*)preamble_cs
)->ib
;
590 r
= amdgpu_cs_submit(ctx
->ctx
, 0, &request
, 1);
593 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
595 fprintf(stderr
, "amdgpu: The CS has been rejected, "
596 "see dmesg for more information.\n");
599 amdgpu_bo_list_destroy(bo_list
);
602 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
604 radv_assign_last_submit(ctx
, &request
);
609 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx
*_ctx
,
611 struct radeon_winsys_cs
**cs_array
,
613 struct radeon_winsys_cs
*preamble_cs
,
614 struct radeon_winsys_fence
*_fence
)
617 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
618 struct amdgpu_cs_fence
*fence
= (struct amdgpu_cs_fence
*)_fence
;
619 amdgpu_bo_list_handle bo_list
;
620 struct amdgpu_cs_request request
;
624 for (unsigned i
= 0; i
< cs_count
;) {
625 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[i
]);
626 struct amdgpu_cs_ib_info ibs
[AMDGPU_CS_MAX_IBS_PER_SUBMIT
];
627 unsigned cnt
= MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT
- !!preamble_cs
,
630 memset(&request
, 0, sizeof(request
));
632 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
, NULL
,
633 preamble_cs
, &bo_list
);
635 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
639 request
.ip_type
= cs0
->hw_ip
;
640 request
.ring
= queue_idx
;
641 request
.resources
= bo_list
;
642 request
.number_of_ibs
= cnt
+ !!preamble_cs
;
646 ibs
[0] = radv_amdgpu_cs(preamble_cs
)->ib
;
649 for (unsigned j
= 0; j
< cnt
; ++j
) {
650 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
651 ibs
[j
+ !!preamble_cs
] = cs
->ib
;
653 if (cs
->is_chained
) {
654 *cs
->ib_size_ptr
-= 4;
655 cs
->is_chained
= false;
659 r
= amdgpu_cs_submit(ctx
->ctx
, 0, &request
, 1);
662 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
664 fprintf(stderr
, "amdgpu: The CS has been rejected, "
665 "see dmesg for more information.\n");
668 amdgpu_bo_list_destroy(bo_list
);
676 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
678 radv_assign_last_submit(ctx
, &request
);
683 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx
*_ctx
,
685 struct radeon_winsys_cs
**cs_array
,
687 struct radeon_winsys_cs
*preamble_cs
,
688 struct radeon_winsys_fence
*_fence
)
691 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
692 struct amdgpu_cs_fence
*fence
= (struct amdgpu_cs_fence
*)_fence
;
693 struct radv_amdgpu_cs
*cs0
= radv_amdgpu_cs(cs_array
[0]);
694 struct radeon_winsys
*ws
= (struct radeon_winsys
*)cs0
->ws
;
695 amdgpu_bo_list_handle bo_list
;
696 struct amdgpu_cs_request request
;
697 uint32_t pad_word
= 0xffff1000U
;
699 if (radv_amdgpu_winsys(ws
)->family
== FAMILY_SI
)
700 pad_word
= 0x80000000;
704 for (unsigned i
= 0; i
< cs_count
;) {
705 struct amdgpu_cs_ib_info ib
= {0};
706 struct radeon_winsys_bo
*bo
= NULL
;
712 size
+= preamble_cs
->cdw
;
714 while (i
+ cnt
< cs_count
&& 0xffff8 - size
>= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
) {
715 size
+= radv_amdgpu_cs(cs_array
[i
+ cnt
])->base
.cdw
;
721 bo
= ws
->buffer_create(ws
, 4 * size
, 4096, RADEON_DOMAIN_GTT
, RADEON_FLAG_CPU_ACCESS
);
722 ptr
= ws
->buffer_map(bo
);
725 memcpy(ptr
, preamble_cs
->buf
, preamble_cs
->cdw
* 4);
726 ptr
+= preamble_cs
->cdw
;
729 for (unsigned j
= 0; j
< cnt
; ++j
) {
730 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[i
+ j
]);
731 memcpy(ptr
, cs
->base
.buf
, 4 * cs
->base
.cdw
);
736 while(!size
|| (size
& 7)) {
741 memset(&request
, 0, sizeof(request
));
744 r
= radv_amdgpu_create_bo_list(cs0
->ws
, &cs_array
[i
], cnt
,
745 (struct radv_amdgpu_winsys_bo
*)bo
,
746 preamble_cs
, &bo_list
);
748 fprintf(stderr
, "amdgpu: Failed to created the BO list for submission\n");
753 ib
.ib_mc_address
= ws
->buffer_get_va(bo
);
755 request
.ip_type
= cs0
->hw_ip
;
756 request
.ring
= queue_idx
;
757 request
.resources
= bo_list
;
758 request
.number_of_ibs
= 1;
761 r
= amdgpu_cs_submit(ctx
->ctx
, 0, &request
, 1);
764 fprintf(stderr
, "amdgpu: Not enough memory for command submission.\n");
766 fprintf(stderr
, "amdgpu: The CS has been rejected, "
767 "see dmesg for more information.\n");
770 amdgpu_bo_list_destroy(bo_list
);
772 ws
->buffer_destroy(bo
);
779 radv_amdgpu_request_to_fence(ctx
, fence
, &request
);
781 radv_assign_last_submit(ctx
, &request
);
786 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx
*_ctx
,
788 struct radeon_winsys_cs
**cs_array
,
790 struct radeon_winsys_cs
*preamble_cs
,
791 struct radeon_winsys_sem
**wait_sem
,
792 unsigned wait_sem_count
,
793 struct radeon_winsys_sem
**signal_sem
,
794 unsigned signal_sem_count
,
796 struct radeon_winsys_fence
*_fence
)
798 struct radv_amdgpu_cs
*cs
= radv_amdgpu_cs(cs_array
[0]);
799 struct radv_amdgpu_ctx
*ctx
= radv_amdgpu_ctx(_ctx
);
803 for (i
= 0; i
< wait_sem_count
; i
++) {
804 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)wait_sem
[i
];
805 amdgpu_cs_wait_semaphore(ctx
->ctx
, cs
->hw_ip
, 0, queue_idx
,
808 if (!cs
->ws
->use_ib_bos
) {
809 ret
= radv_amdgpu_winsys_cs_submit_sysmem(_ctx
, queue_idx
, cs_array
,
810 cs_count
, preamble_cs
, _fence
);
811 } else if (can_patch
&& cs_count
> AMDGPU_CS_MAX_IBS_PER_SUBMIT
&& false) {
812 ret
= radv_amdgpu_winsys_cs_submit_chained(_ctx
, queue_idx
, cs_array
,
813 cs_count
, preamble_cs
, _fence
);
815 ret
= radv_amdgpu_winsys_cs_submit_fallback(_ctx
, queue_idx
, cs_array
,
816 cs_count
, preamble_cs
, _fence
);
819 for (i
= 0; i
< signal_sem_count
; i
++) {
820 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)signal_sem
[i
];
821 amdgpu_cs_signal_semaphore(ctx
->ctx
, cs
->hw_ip
, 0, queue_idx
,
828 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs
, uint64_t addr
)
830 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
832 for (unsigned i
= 0; i
<= cs
->num_old_ib_buffers
; ++i
) {
833 struct radv_amdgpu_winsys_bo
*bo
;
835 bo
= (struct radv_amdgpu_winsys_bo
*)
836 (i
== cs
->num_old_ib_buffers
? cs
->ib_buffer
: cs
->old_ib_buffers
[i
]);
837 if (addr
>= bo
->va
&& addr
- bo
->va
< bo
->size
) {
838 if (amdgpu_bo_cpu_map(bo
->bo
, &ret
) == 0)
839 return (char *)ret
+ (addr
- bo
->va
);
845 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs
*_cs
,
849 struct radv_amdgpu_cs
*cs
= (struct radv_amdgpu_cs
*)_cs
;
852 radv_amdgpu_winsys_get_cpu_addr(cs
, cs
->ib
.ib_mc_address
),
853 cs
->ib
.size
, trace_id
, "main IB", cs
->ws
->info
.chip_class
,
854 radv_amdgpu_winsys_get_cpu_addr
, cs
);
857 static struct radeon_winsys_ctx
*radv_amdgpu_ctx_create(struct radeon_winsys
*_ws
)
859 struct radv_amdgpu_winsys
*ws
= radv_amdgpu_winsys(_ws
);
860 struct radv_amdgpu_ctx
*ctx
= CALLOC_STRUCT(radv_amdgpu_ctx
);
865 r
= amdgpu_cs_ctx_create(ws
->dev
, &ctx
->ctx
);
867 fprintf(stderr
, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r
);
871 return (struct radeon_winsys_ctx
*)ctx
;
877 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx
*rwctx
)
879 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
880 amdgpu_cs_ctx_free(ctx
->ctx
);
884 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx
*rwctx
,
885 enum ring_type ring_type
, int ring_index
)
887 struct radv_amdgpu_ctx
*ctx
= (struct radv_amdgpu_ctx
*)rwctx
;
888 int ip_type
= ring_to_hw_ip(ring_type
);
890 if (ctx
->last_submission
[ip_type
][ring_index
].fence
) {
892 int ret
= amdgpu_cs_query_fence_status(&ctx
->last_submission
[ip_type
][ring_index
],
893 1000000000ull, 0, &expired
);
902 static struct radeon_winsys_sem
*radv_amdgpu_create_sem(struct radeon_winsys
*_ws
)
905 amdgpu_semaphore_handle sem
;
907 ret
= amdgpu_cs_create_semaphore(&sem
);
910 return (struct radeon_winsys_sem
*)sem
;
913 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem
*_sem
)
915 amdgpu_semaphore_handle sem
= (amdgpu_semaphore_handle
)_sem
;
916 amdgpu_cs_destroy_semaphore(sem
);
919 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys
*ws
)
921 ws
->base
.ctx_create
= radv_amdgpu_ctx_create
;
922 ws
->base
.ctx_destroy
= radv_amdgpu_ctx_destroy
;
923 ws
->base
.ctx_wait_idle
= radv_amdgpu_ctx_wait_idle
;
924 ws
->base
.cs_create
= radv_amdgpu_cs_create
;
925 ws
->base
.cs_destroy
= radv_amdgpu_cs_destroy
;
926 ws
->base
.cs_grow
= radv_amdgpu_cs_grow
;
927 ws
->base
.cs_finalize
= radv_amdgpu_cs_finalize
;
928 ws
->base
.cs_reset
= radv_amdgpu_cs_reset
;
929 ws
->base
.cs_add_buffer
= radv_amdgpu_cs_add_buffer
;
930 ws
->base
.cs_execute_secondary
= radv_amdgpu_cs_execute_secondary
;
931 ws
->base
.cs_submit
= radv_amdgpu_winsys_cs_submit
;
932 ws
->base
.cs_dump
= radv_amdgpu_winsys_cs_dump
;
933 ws
->base
.create_fence
= radv_amdgpu_create_fence
;
934 ws
->base
.destroy_fence
= radv_amdgpu_destroy_fence
;
935 ws
->base
.create_sem
= radv_amdgpu_create_sem
;
936 ws
->base
.destroy_sem
= radv_amdgpu_destroy_sem
;
937 ws
->base
.fence_wait
= radv_amdgpu_fence_wait
;