radv: pass queue index into winsys submission
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_cs.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29
30 #include "amdgpu_id.h"
31 #include "radv_radeon_winsys.h"
32 #include "radv_amdgpu_cs.h"
33 #include "radv_amdgpu_bo.h"
34 #include "sid.h"
35
36 struct radv_amdgpu_cs {
37 struct radeon_winsys_cs base;
38 struct radv_amdgpu_winsys *ws;
39
40 struct amdgpu_cs_ib_info ib;
41
42 struct radeon_winsys_bo *ib_buffer;
43 uint8_t *ib_mapped;
44 unsigned max_num_buffers;
45 unsigned num_buffers;
46 amdgpu_bo_handle *handles;
47 uint8_t *priorities;
48
49 struct radeon_winsys_bo **old_ib_buffers;
50 unsigned num_old_ib_buffers;
51 unsigned max_num_old_ib_buffers;
52 unsigned *ib_size_ptr;
53 bool failed;
54 bool is_chained;
55
56 int buffer_hash_table[1024];
57 unsigned hw_ip;
58 };
59
60 static inline struct radv_amdgpu_cs *
61 radv_amdgpu_cs(struct radeon_winsys_cs *base)
62 {
63 return (struct radv_amdgpu_cs*)base;
64 }
65
66 static int ring_to_hw_ip(enum ring_type ring)
67 {
68 switch (ring) {
69 case RING_GFX:
70 return AMDGPU_HW_IP_GFX;
71 case RING_DMA:
72 return AMDGPU_HW_IP_DMA;
73 case RING_COMPUTE:
74 return AMDGPU_HW_IP_COMPUTE;
75 default:
76 unreachable("unsupported ring");
77 }
78 }
79
80 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
81 struct amdgpu_cs_fence *fence,
82 struct amdgpu_cs_request *req)
83 {
84 fence->context = ctx->ctx;
85 fence->ip_type = req->ip_type;
86 fence->ip_instance = req->ip_instance;
87 fence->ring = req->ring;
88 fence->fence = req->seq_no;
89 }
90
91 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
92 {
93 struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
94 return (struct radeon_winsys_fence*)fence;
95 }
96
97 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
98 {
99 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
100 free(fence);
101 }
102
103 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
104 struct radeon_winsys_fence *_fence,
105 bool absolute,
106 uint64_t timeout)
107 {
108 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
109 unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
110 int r;
111 uint32_t expired = 0;
112
113 /* Now use the libdrm query. */
114 r = amdgpu_cs_query_fence_status(fence,
115 timeout,
116 flags,
117 &expired);
118
119 if (r) {
120 fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
121 return false;
122 }
123
124 if (expired)
125 return true;
126
127 return false;
128 }
129
130 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
131 {
132 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
133
134 if (cs->ib_buffer)
135 cs->ws->base.buffer_destroy(cs->ib_buffer);
136 else
137 free(cs->base.buf);
138
139 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
140 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
141
142 free(cs->old_ib_buffers);
143 free(cs->handles);
144 free(cs->priorities);
145 free(cs);
146 }
147
148 static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
149 enum ring_type ring_type)
150 {
151 for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
152 cs->buffer_hash_table[i] = -1;
153
154 cs->hw_ip = ring_to_hw_ip(ring_type);
155 return true;
156 }
157
158 static struct radeon_winsys_cs *
159 radv_amdgpu_cs_create(struct radeon_winsys *ws,
160 enum ring_type ring_type)
161 {
162 struct radv_amdgpu_cs *cs;
163 uint32_t ib_size = 20 * 1024 * 4;
164 cs = calloc(1, sizeof(struct radv_amdgpu_cs));
165 if (!cs)
166 return NULL;
167
168 cs->ws = radv_amdgpu_winsys(ws);
169 radv_amdgpu_init_cs(cs, ring_type);
170
171 if (cs->ws->use_ib_bos) {
172 cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
173 RADEON_DOMAIN_GTT,
174 RADEON_FLAG_CPU_ACCESS);
175 if (!cs->ib_buffer) {
176 free(cs);
177 return NULL;
178 }
179
180 cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
181 if (!cs->ib_mapped) {
182 ws->buffer_destroy(cs->ib_buffer);
183 free(cs);
184 return NULL;
185 }
186
187 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
188 cs->base.buf = (uint32_t *)cs->ib_mapped;
189 cs->base.max_dw = ib_size / 4 - 4;
190 cs->ib_size_ptr = &cs->ib.size;
191 cs->ib.size = 0;
192
193 ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
194 } else {
195 cs->base.buf = malloc(16384);
196 cs->base.max_dw = 4096;
197 if (!cs->base.buf) {
198 free(cs);
199 return NULL;
200 }
201 }
202
203 return &cs->base;
204 }
205
206 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
207 {
208 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
209
210 if (cs->failed) {
211 cs->base.cdw = 0;
212 return;
213 }
214
215 if (!cs->ws->use_ib_bos) {
216 const uint64_t limit_dws = 0xffff8;
217 uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
218 MIN2(cs->base.max_dw * 2, limit_dws));
219
220 /* The total ib size cannot exceed limit_dws dwords. */
221 if (ib_dws > limit_dws)
222 {
223 cs->failed = true;
224 cs->base.cdw = 0;
225 return;
226 }
227
228 uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
229 if (new_buf) {
230 cs->base.buf = new_buf;
231 cs->base.max_dw = ib_dws;
232 } else {
233 cs->failed = true;
234 cs->base.cdw = 0;
235 }
236 return;
237 }
238
239 uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
240
241 /* max that fits in the chain size field. */
242 ib_size = MIN2(ib_size, 0xfffff);
243
244 while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
245 cs->base.buf[cs->base.cdw++] = 0xffff1000;
246
247 *cs->ib_size_ptr |= cs->base.cdw + 4;
248
249 if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
250 cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
251 cs->old_ib_buffers = realloc(cs->old_ib_buffers,
252 cs->max_num_old_ib_buffers * sizeof(void*));
253 }
254
255 cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
256
257 cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
258 RADEON_DOMAIN_GTT,
259 RADEON_FLAG_CPU_ACCESS);
260
261 if (!cs->ib_buffer) {
262 cs->base.cdw = 0;
263 cs->failed = true;
264 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
265 }
266
267 cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
268 if (!cs->ib_mapped) {
269 cs->ws->base.buffer_destroy(cs->ib_buffer);
270 cs->base.cdw = 0;
271 cs->failed = true;
272 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
273 }
274
275 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
276
277 cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
278 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
279 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
280 cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
281 cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
282
283 cs->base.buf = (uint32_t *)cs->ib_mapped;
284 cs->base.cdw = 0;
285 cs->base.max_dw = ib_size / 4 - 4;
286
287 }
288
289 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
290 {
291 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
292
293 if (cs->ws->use_ib_bos) {
294 while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
295 cs->base.buf[cs->base.cdw++] = 0xffff1000;
296
297 *cs->ib_size_ptr |= cs->base.cdw;
298
299 cs->is_chained = false;
300 }
301
302 return !cs->failed;
303 }
304
305 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
306 {
307 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
308 cs->base.cdw = 0;
309 cs->failed = false;
310
311 for (unsigned i = 0; i < cs->num_buffers; ++i) {
312 unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
313 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
314 cs->buffer_hash_table[hash] = -1;
315 }
316
317 cs->num_buffers = 0;
318
319 if (cs->ws->use_ib_bos) {
320 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
321
322 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
323 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
324
325 cs->num_old_ib_buffers = 0;
326 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
327 cs->ib_size_ptr = &cs->ib.size;
328 cs->ib.size = 0;
329 }
330 }
331
332 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
333 amdgpu_bo_handle bo)
334 {
335 unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
336 int index = cs->buffer_hash_table[hash];
337
338 if (index == -1)
339 return -1;
340
341 if (cs->handles[index] == bo)
342 return index;
343
344 for (unsigned i = 0; i < cs->num_buffers; ++i) {
345 if (cs->handles[i] == bo) {
346 cs->buffer_hash_table[hash] = i;
347 return i;
348 }
349 }
350
351 return -1;
352 }
353
354 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
355 amdgpu_bo_handle bo,
356 uint8_t priority)
357 {
358 unsigned hash;
359 int index = radv_amdgpu_cs_find_buffer(cs, bo);
360
361 if (index != -1) {
362 cs->priorities[index] = MAX2(cs->priorities[index], priority);
363 return;
364 }
365
366 if (cs->num_buffers == cs->max_num_buffers) {
367 unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
368 cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
369 cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
370 cs->max_num_buffers = new_count;
371 }
372
373 cs->handles[cs->num_buffers] = bo;
374 cs->priorities[cs->num_buffers] = priority;
375
376 hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
377 cs->buffer_hash_table[hash] = cs->num_buffers;
378
379 ++cs->num_buffers;
380 }
381
382 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
383 struct radeon_winsys_bo *_bo,
384 uint8_t priority)
385 {
386 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
387 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
388
389 radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
390 }
391
392 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
393 struct radeon_winsys_cs *_child)
394 {
395 struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
396 struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
397
398 for (unsigned i = 0; i < child->num_buffers; ++i) {
399 radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
400 child->priorities[i]);
401 }
402
403 if (parent->ws->use_ib_bos) {
404 if (parent->base.cdw + 4 > parent->base.max_dw)
405 radv_amdgpu_cs_grow(&parent->base, 4);
406
407 parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
408 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
409 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
410 parent->base.buf[parent->base.cdw++] = child->ib.size;
411 } else {
412 if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
413 radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
414
415 memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
416 parent->base.cdw += child->base.cdw;
417 }
418 }
419
420 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
421 struct radeon_winsys_cs **cs_array,
422 unsigned count,
423 struct radv_amdgpu_winsys_bo *extra_bo,
424 amdgpu_bo_list_handle *bo_list)
425 {
426 int r;
427 if (ws->debug_all_bos) {
428 struct radv_amdgpu_winsys_bo *bo;
429 amdgpu_bo_handle *handles;
430 unsigned num = 0;
431
432 pthread_mutex_lock(&ws->global_bo_list_lock);
433
434 handles = malloc(sizeof(handles[0]) * ws->num_buffers);
435 if (!handles) {
436 pthread_mutex_unlock(&ws->global_bo_list_lock);
437 return -ENOMEM;
438 }
439
440 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
441 assert(num < ws->num_buffers);
442 handles[num++] = bo->bo;
443 }
444
445 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
446 handles, NULL,
447 bo_list);
448 free(handles);
449 pthread_mutex_unlock(&ws->global_bo_list_lock);
450 } else if (count == 1 && !extra_bo) {
451 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
452 r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
453 cs->priorities, bo_list);
454 } else {
455 unsigned total_buffer_count = !!extra_bo;
456 unsigned unique_bo_count = !!extra_bo;
457 for (unsigned i = 0; i < count; ++i) {
458 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
459 total_buffer_count += cs->num_buffers;
460 }
461
462 amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
463 uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
464 if (!handles || !priorities) {
465 free(handles);
466 free(priorities);
467 return -ENOMEM;
468 }
469
470 if (extra_bo) {
471 handles[0] = extra_bo->bo;
472 priorities[0] = 8;
473 }
474
475 for (unsigned i = 0; i < count; ++i) {
476 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
477 for (unsigned j = 0; j < cs->num_buffers; ++j) {
478 bool found = false;
479 for (unsigned k = 0; k < unique_bo_count; ++k) {
480 if (handles[k] == cs->handles[j]) {
481 found = true;
482 priorities[k] = MAX2(priorities[k],
483 cs->priorities[j]);
484 break;
485 }
486 }
487 if (!found) {
488 handles[unique_bo_count] = cs->handles[j];
489 priorities[unique_bo_count] = cs->priorities[j];
490 ++unique_bo_count;
491 }
492 }
493 }
494 r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
495 priorities, bo_list);
496
497 free(handles);
498 free(priorities);
499 }
500
501 return r;
502 }
503
504 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
505 struct amdgpu_cs_request *request)
506 {
507 radv_amdgpu_request_to_fence(ctx,
508 &ctx->last_submission[request->ip_type][request->ring],
509 request);
510 }
511
512 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
513 int queue_idx,
514 struct radeon_winsys_cs **cs_array,
515 unsigned cs_count,
516 struct radeon_winsys_fence *_fence)
517 {
518 int r;
519 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
520 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
521 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
522 amdgpu_bo_list_handle bo_list;
523 struct amdgpu_cs_request request = {0};
524
525 for (unsigned i = cs_count; i--;) {
526 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
527
528 if (cs->is_chained) {
529 *cs->ib_size_ptr -= 4;
530 cs->is_chained = false;
531 }
532
533 if (i + 1 < cs_count) {
534 struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
535 assert(cs->base.cdw + 4 <= cs->base.max_dw);
536
537 cs->is_chained = true;
538 *cs->ib_size_ptr += 4;
539
540 cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
541 cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
542 cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
543 cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
544 }
545 }
546
547 r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
548 if (r) {
549 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
550 return r;
551 }
552
553 request.ip_type = cs0->hw_ip;
554 request.ring = queue_idx;
555 request.number_of_ibs = 1;
556 request.ibs = &cs0->ib;
557 request.resources = bo_list;
558
559 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
560 if (r) {
561 if (r == -ENOMEM)
562 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
563 else
564 fprintf(stderr, "amdgpu: The CS has been rejected, "
565 "see dmesg for more information.\n");
566 }
567
568 amdgpu_bo_list_destroy(bo_list);
569
570 if (fence)
571 radv_amdgpu_request_to_fence(ctx, fence, &request);
572
573 radv_assign_last_submit(ctx, &request);
574
575 return r;
576 }
577
578 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
579 int queue_idx,
580 struct radeon_winsys_cs **cs_array,
581 unsigned cs_count,
582 struct radeon_winsys_fence *_fence)
583 {
584 int r;
585 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
586 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
587 amdgpu_bo_list_handle bo_list;
588 struct amdgpu_cs_request request;
589
590 assert(cs_count);
591
592 for (unsigned i = 0; i < cs_count;) {
593 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
594 struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
595 unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
596
597 memset(&request, 0, sizeof(request));
598
599 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
600 if (r) {
601 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
602 return r;
603 }
604
605 request.ip_type = cs0->hw_ip;
606 request.ring = queue_idx;
607 request.resources = bo_list;
608 request.number_of_ibs = cnt;
609 request.ibs = ibs;
610
611 for (unsigned j = 0; j < cnt; ++j) {
612 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
613 ibs[j] = cs->ib;
614
615 if (cs->is_chained) {
616 *cs->ib_size_ptr -= 4;
617 cs->is_chained = false;
618 }
619 }
620
621 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
622 if (r) {
623 if (r == -ENOMEM)
624 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
625 else
626 fprintf(stderr, "amdgpu: The CS has been rejected, "
627 "see dmesg for more information.\n");
628 }
629
630 amdgpu_bo_list_destroy(bo_list);
631
632 if (r)
633 return r;
634
635 i += cnt;
636 }
637 if (fence)
638 radv_amdgpu_request_to_fence(ctx, fence, &request);
639
640 radv_assign_last_submit(ctx, &request);
641
642 return 0;
643 }
644
645 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
646 int queue_idx,
647 struct radeon_winsys_cs **cs_array,
648 unsigned cs_count,
649 struct radeon_winsys_fence *_fence)
650 {
651 int r;
652 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
653 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
654 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
655 struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
656 amdgpu_bo_list_handle bo_list;
657 struct amdgpu_cs_request request;
658 uint32_t pad_word = 0xffff1000U;
659
660 if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
661 pad_word = 0x80000000;
662
663 assert(cs_count);
664
665 for (unsigned i = 0; i < cs_count;) {
666 struct amdgpu_cs_ib_info ib = {0};
667 struct radeon_winsys_bo *bo = NULL;
668 uint32_t *ptr;
669 unsigned cnt = 0;
670 unsigned size = 0;
671
672 while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
673 size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
674 ++cnt;
675 }
676
677 assert(cnt);
678
679 bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
680 ptr = ws->buffer_map(bo);
681
682 for (unsigned j = 0; j < cnt; ++j) {
683 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
684 memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
685 ptr += cs->base.cdw;
686
687 }
688
689 while(!size || (size & 7)) {
690 *ptr++ = pad_word;
691 ++size;
692 }
693
694 memset(&request, 0, sizeof(request));
695
696
697 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
698 (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
699 if (r) {
700 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
701 return r;
702 }
703
704 ib.size = size;
705 ib.ib_mc_address = ws->buffer_get_va(bo);
706
707 request.ip_type = cs0->hw_ip;
708 request.ring = queue_idx;
709 request.resources = bo_list;
710 request.number_of_ibs = 1;
711 request.ibs = &ib;
712
713 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
714 if (r) {
715 if (r == -ENOMEM)
716 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
717 else
718 fprintf(stderr, "amdgpu: The CS has been rejected, "
719 "see dmesg for more information.\n");
720 }
721
722 amdgpu_bo_list_destroy(bo_list);
723
724 ws->buffer_destroy(bo);
725 if (r)
726 return r;
727
728 i += cnt;
729 }
730 if (fence)
731 radv_amdgpu_request_to_fence(ctx, fence, &request);
732
733 radv_assign_last_submit(ctx, &request);
734
735 return 0;
736 }
737
738 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
739 int queue_idx,
740 struct radeon_winsys_cs **cs_array,
741 unsigned cs_count,
742 bool can_patch,
743 struct radeon_winsys_fence *_fence)
744 {
745 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
746 if (!cs->ws->use_ib_bos) {
747 return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
748 cs_count, _fence);
749 } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
750 return radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
751 cs_count, _fence);
752 } else {
753 return radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
754 cs_count, _fence);
755 }
756 }
757
758 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
759 {
760 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
761 struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
762 int r;
763
764 if (!ctx)
765 return NULL;
766 r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
767 if (r) {
768 fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
769 goto error_create;
770 }
771 ctx->ws = ws;
772 return (struct radeon_winsys_ctx *)ctx;
773 error_create:
774 FREE(ctx);
775 return NULL;
776 }
777
778 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
779 {
780 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
781 amdgpu_cs_ctx_free(ctx->ctx);
782 FREE(ctx);
783 }
784
785 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
786 enum ring_type ring_type, int ring_index)
787 {
788 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
789 int ip_type = ring_to_hw_ip(ring_type);
790
791 if (ctx->last_submission[ip_type][ring_index].fence) {
792 uint32_t expired;
793 int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
794 1000000000ull, 0, &expired);
795
796 if (ret || !expired)
797 return false;
798 }
799
800 return true;
801 }
802
803 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
804 {
805 ws->base.ctx_create = radv_amdgpu_ctx_create;
806 ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
807 ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
808 ws->base.cs_create = radv_amdgpu_cs_create;
809 ws->base.cs_destroy = radv_amdgpu_cs_destroy;
810 ws->base.cs_grow = radv_amdgpu_cs_grow;
811 ws->base.cs_finalize = radv_amdgpu_cs_finalize;
812 ws->base.cs_reset = radv_amdgpu_cs_reset;
813 ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
814 ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
815 ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
816 ws->base.create_fence = radv_amdgpu_create_fence;
817 ws->base.destroy_fence = radv_amdgpu_destroy_fence;
818 ws->base.fence_wait = radv_amdgpu_fence_wait;
819 }