radv/winsys: Fix radv_amdgpu_cs_grow min_size argument. (v2)
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_cs.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29
30 #include "amdgpu_id.h"
31 #include "radv_radeon_winsys.h"
32 #include "radv_amdgpu_cs.h"
33 #include "radv_amdgpu_bo.h"
34 #include "sid.h"
35
36 struct radv_amdgpu_cs {
37 struct radeon_winsys_cs base;
38 struct radv_amdgpu_winsys *ws;
39
40 struct amdgpu_cs_ib_info ib;
41
42 struct radeon_winsys_bo *ib_buffer;
43 uint8_t *ib_mapped;
44 unsigned max_num_buffers;
45 unsigned num_buffers;
46 amdgpu_bo_handle *handles;
47 uint8_t *priorities;
48
49 struct radeon_winsys_bo **old_ib_buffers;
50 unsigned num_old_ib_buffers;
51 unsigned max_num_old_ib_buffers;
52 unsigned *ib_size_ptr;
53 bool failed;
54 bool is_chained;
55
56 int buffer_hash_table[1024];
57 };
58
59 static inline struct radv_amdgpu_cs *
60 radv_amdgpu_cs(struct radeon_winsys_cs *base)
61 {
62 return (struct radv_amdgpu_cs*)base;
63 }
64
65
66 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
67 {
68 struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
69 return (struct radeon_winsys_fence*)fence;
70 }
71
72 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
73 {
74 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
75 free(fence);
76 }
77
78 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
79 struct radeon_winsys_fence *_fence,
80 bool absolute,
81 uint64_t timeout)
82 {
83 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
84 unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
85 int r;
86 uint32_t expired = 0;
87
88 /* Now use the libdrm query. */
89 r = amdgpu_cs_query_fence_status(fence,
90 timeout,
91 flags,
92 &expired);
93
94 if (r) {
95 fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
96 return false;
97 }
98
99 if (expired)
100 return true;
101
102 return false;
103 }
104
105 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
106 {
107 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
108
109 if (cs->ib_buffer)
110 cs->ws->base.buffer_destroy(cs->ib_buffer);
111 else
112 free(cs->base.buf);
113
114 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
115 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
116
117 free(cs->old_ib_buffers);
118 free(cs->handles);
119 free(cs->priorities);
120 free(cs);
121 }
122
123 static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
124 enum ring_type ring_type)
125 {
126 for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
127 cs->buffer_hash_table[i] = -1;
128
129 return true;
130 }
131
132 static struct radeon_winsys_cs *
133 radv_amdgpu_cs_create(struct radeon_winsys *ws,
134 enum ring_type ring_type)
135 {
136 struct radv_amdgpu_cs *cs;
137 uint32_t ib_size = 20 * 1024 * 4;
138 cs = calloc(1, sizeof(struct radv_amdgpu_cs));
139 if (!cs)
140 return NULL;
141
142 cs->ws = radv_amdgpu_winsys(ws);
143 radv_amdgpu_init_cs(cs, RING_GFX);
144
145 if (cs->ws->use_ib_bos) {
146 cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
147 RADEON_DOMAIN_GTT,
148 RADEON_FLAG_CPU_ACCESS);
149 if (!cs->ib_buffer) {
150 free(cs);
151 return NULL;
152 }
153
154 cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
155 if (!cs->ib_mapped) {
156 ws->buffer_destroy(cs->ib_buffer);
157 free(cs);
158 return NULL;
159 }
160
161 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
162 cs->base.buf = (uint32_t *)cs->ib_mapped;
163 cs->base.max_dw = ib_size / 4 - 4;
164 cs->ib_size_ptr = &cs->ib.size;
165 cs->ib.size = 0;
166
167 ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
168 } else {
169 cs->base.buf = malloc(16384);
170 cs->base.max_dw = 4096;
171 if (!cs->base.buf) {
172 free(cs);
173 return NULL;
174 }
175 }
176
177 return &cs->base;
178 }
179
180 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
181 {
182 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
183
184 if (cs->failed) {
185 cs->base.cdw = 0;
186 return;
187 }
188
189 if (!cs->ws->use_ib_bos) {
190 uint64_t ib_size = MAX2((cs->base.cdw + min_size) * 4 + 16,
191 cs->base.max_dw * 4 * 2);
192 uint32_t *new_buf = realloc(cs->base.buf, ib_size);
193 if (new_buf) {
194 cs->base.buf = new_buf;
195 cs->base.max_dw = ib_size / 4;
196 } else {
197 cs->failed = true;
198 cs->base.cdw = 0;
199 }
200 return;
201 }
202
203 uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
204
205 /* max that fits in the chain size field. */
206 ib_size = MIN2(ib_size, 0xfffff);
207
208 while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
209 cs->base.buf[cs->base.cdw++] = 0xffff1000;
210
211 *cs->ib_size_ptr |= cs->base.cdw + 4;
212
213 if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
214 cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
215 cs->old_ib_buffers = realloc(cs->old_ib_buffers,
216 cs->max_num_old_ib_buffers * sizeof(void*));
217 }
218
219 cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
220
221 cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
222 RADEON_DOMAIN_GTT,
223 RADEON_FLAG_CPU_ACCESS);
224
225 if (!cs->ib_buffer) {
226 cs->base.cdw = 0;
227 cs->failed = true;
228 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
229 }
230
231 cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
232 if (!cs->ib_mapped) {
233 cs->ws->base.buffer_destroy(cs->ib_buffer);
234 cs->base.cdw = 0;
235 cs->failed = true;
236 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
237 }
238
239 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
240
241 cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
242 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
243 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
244 cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
245 cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
246
247 cs->base.buf = (uint32_t *)cs->ib_mapped;
248 cs->base.cdw = 0;
249 cs->base.max_dw = ib_size / 4 - 4;
250
251 }
252
253 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
254 {
255 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
256
257 if (cs->ws->use_ib_bos) {
258 while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
259 cs->base.buf[cs->base.cdw++] = 0xffff1000;
260
261 *cs->ib_size_ptr |= cs->base.cdw;
262
263 cs->is_chained = false;
264 }
265
266 return !cs->failed;
267 }
268
269 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
270 {
271 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
272 cs->base.cdw = 0;
273 cs->failed = false;
274
275 for (unsigned i = 0; i < cs->num_buffers; ++i) {
276 unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
277 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
278 cs->buffer_hash_table[hash] = -1;
279 }
280
281 cs->num_buffers = 0;
282
283 if (cs->ws->use_ib_bos) {
284 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
285
286 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
287 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
288
289 cs->num_old_ib_buffers = 0;
290 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
291 cs->ib_size_ptr = &cs->ib.size;
292 cs->ib.size = 0;
293 }
294 }
295
296 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
297 amdgpu_bo_handle bo)
298 {
299 unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
300 int index = cs->buffer_hash_table[hash];
301
302 if (index == -1)
303 return -1;
304
305 if (cs->handles[index] == bo)
306 return index;
307
308 for (unsigned i = 0; i < cs->num_buffers; ++i) {
309 if (cs->handles[i] == bo) {
310 cs->buffer_hash_table[hash] = i;
311 return i;
312 }
313 }
314
315 return -1;
316 }
317
318 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
319 amdgpu_bo_handle bo,
320 uint8_t priority)
321 {
322 unsigned hash;
323 int index = radv_amdgpu_cs_find_buffer(cs, bo);
324
325 if (index != -1) {
326 cs->priorities[index] = MAX2(cs->priorities[index], priority);
327 return;
328 }
329
330 if (cs->num_buffers == cs->max_num_buffers) {
331 unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
332 cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
333 cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
334 cs->max_num_buffers = new_count;
335 }
336
337 cs->handles[cs->num_buffers] = bo;
338 cs->priorities[cs->num_buffers] = priority;
339
340 hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
341 cs->buffer_hash_table[hash] = cs->num_buffers;
342
343 ++cs->num_buffers;
344 }
345
346 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
347 struct radeon_winsys_bo *_bo,
348 uint8_t priority)
349 {
350 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
351 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
352
353 radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
354 }
355
356 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
357 struct radeon_winsys_cs *_child)
358 {
359 struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
360 struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
361
362 for (unsigned i = 0; i < child->num_buffers; ++i) {
363 radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
364 child->priorities[i]);
365 }
366
367 if (parent->ws->use_ib_bos) {
368 if (parent->base.cdw + 4 > parent->base.max_dw)
369 radv_amdgpu_cs_grow(&parent->base, 4);
370
371 parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
372 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
373 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
374 parent->base.buf[parent->base.cdw++] = child->ib.size;
375 } else {
376 if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
377 radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
378
379 memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
380 parent->base.cdw += child->base.cdw;
381 }
382 }
383
384 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
385 struct radeon_winsys_cs **cs_array,
386 unsigned count,
387 struct radv_amdgpu_winsys_bo *extra_bo,
388 amdgpu_bo_list_handle *bo_list)
389 {
390 int r;
391 if (ws->debug_all_bos) {
392 struct radv_amdgpu_winsys_bo *bo;
393 amdgpu_bo_handle *handles;
394 unsigned num = 0;
395
396 pthread_mutex_lock(&ws->global_bo_list_lock);
397
398 handles = malloc(sizeof(handles[0]) * ws->num_buffers);
399 if (!handles) {
400 pthread_mutex_unlock(&ws->global_bo_list_lock);
401 return -ENOMEM;
402 }
403
404 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
405 assert(num < ws->num_buffers);
406 handles[num++] = bo->bo;
407 }
408
409 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
410 handles, NULL,
411 bo_list);
412 free(handles);
413 pthread_mutex_unlock(&ws->global_bo_list_lock);
414 } else if (count == 1 && !extra_bo) {
415 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
416 r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
417 cs->priorities, bo_list);
418 } else {
419 unsigned total_buffer_count = !!extra_bo;
420 unsigned unique_bo_count = !!extra_bo;
421 for (unsigned i = 0; i < count; ++i) {
422 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
423 total_buffer_count += cs->num_buffers;
424 }
425
426 amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
427 uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
428 if (!handles || !priorities) {
429 free(handles);
430 free(priorities);
431 return -ENOMEM;
432 }
433
434 if (extra_bo) {
435 handles[0] = extra_bo->bo;
436 priorities[0] = 8;
437 }
438
439 for (unsigned i = 0; i < count; ++i) {
440 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
441 for (unsigned j = 0; j < cs->num_buffers; ++j) {
442 bool found = false;
443 for (unsigned k = 0; k < unique_bo_count; ++k) {
444 if (handles[k] == cs->handles[j]) {
445 found = true;
446 priorities[k] = MAX2(priorities[k],
447 cs->priorities[j]);
448 break;
449 }
450 }
451 if (!found) {
452 handles[unique_bo_count] = cs->handles[j];
453 priorities[unique_bo_count] = cs->priorities[j];
454 ++unique_bo_count;
455 }
456 }
457 }
458 r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
459 priorities, bo_list);
460
461 free(handles);
462 free(priorities);
463 }
464
465 return r;
466 }
467
468 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
469 struct radeon_winsys_cs **cs_array,
470 unsigned cs_count,
471 struct radeon_winsys_fence *_fence)
472 {
473 int r;
474 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
475 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
476 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
477 amdgpu_bo_list_handle bo_list;
478 struct amdgpu_cs_request request = {0};
479
480 for (unsigned i = cs_count; i--;) {
481 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
482
483 if (cs->is_chained) {
484 *cs->ib_size_ptr -= 4;
485 cs->is_chained = false;
486 }
487
488 if (i + 1 < cs_count) {
489 struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
490 assert(cs->base.cdw + 4 <= cs->base.max_dw);
491
492 cs->is_chained = true;
493 *cs->ib_size_ptr += 4;
494
495 cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
496 cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
497 cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
498 cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
499 }
500 }
501
502 r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
503 if (r) {
504 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
505 return r;
506 }
507
508 request.ip_type = AMDGPU_HW_IP_GFX;
509 request.number_of_ibs = 1;
510 request.ibs = &cs0->ib;
511 request.resources = bo_list;
512
513 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
514 if (r) {
515 if (r == -ENOMEM)
516 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
517 else
518 fprintf(stderr, "amdgpu: The CS has been rejected, "
519 "see dmesg for more information.\n");
520 }
521
522 amdgpu_bo_list_destroy(bo_list);
523
524 if (fence) {
525 fence->context = ctx->ctx;
526 fence->ip_type = request.ip_type;
527 fence->ip_instance = request.ip_instance;
528 fence->ring = request.ring;
529 fence->fence = request.seq_no;
530 }
531 ctx->last_seq_no = request.seq_no;
532
533 return r;
534 }
535
536 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
537 struct radeon_winsys_cs **cs_array,
538 unsigned cs_count,
539 struct radeon_winsys_fence *_fence)
540 {
541 int r;
542 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
543 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
544 amdgpu_bo_list_handle bo_list;
545 struct amdgpu_cs_request request;
546
547 assert(cs_count);
548
549 for (unsigned i = 0; i < cs_count;) {
550 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
551 struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
552 unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
553
554 memset(&request, 0, sizeof(request));
555
556 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
557 if (r) {
558 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
559 return r;
560 }
561
562 request.ip_type = AMDGPU_HW_IP_GFX;
563 request.resources = bo_list;
564 request.number_of_ibs = cnt;
565 request.ibs = ibs;
566
567 for (unsigned j = 0; j < cnt; ++j) {
568 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
569 ibs[j] = cs->ib;
570
571 if (cs->is_chained) {
572 *cs->ib_size_ptr -= 4;
573 cs->is_chained = false;
574 }
575 }
576
577 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
578 if (r) {
579 if (r == -ENOMEM)
580 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
581 else
582 fprintf(stderr, "amdgpu: The CS has been rejected, "
583 "see dmesg for more information.\n");
584 }
585
586 amdgpu_bo_list_destroy(bo_list);
587
588 if (r)
589 return r;
590
591 i += cnt;
592 }
593 if (fence) {
594 fence->context = ctx->ctx;
595 fence->ip_type = request.ip_type;
596 fence->ip_instance = request.ip_instance;
597 fence->ring = request.ring;
598 fence->fence = request.seq_no;
599 }
600 ctx->last_seq_no = request.seq_no;
601
602 return 0;
603 }
604
605 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
606 struct radeon_winsys_cs **cs_array,
607 unsigned cs_count,
608 struct radeon_winsys_fence *_fence)
609 {
610 int r;
611 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
612 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
613 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
614 struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
615 amdgpu_bo_list_handle bo_list;
616 struct amdgpu_cs_request request;
617 uint32_t pad_word = 0xffff1000U;
618
619 if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
620 pad_word = 0x80000000;
621
622 assert(cs_count);
623
624 for (unsigned i = 0; i < cs_count;) {
625 struct amdgpu_cs_ib_info ib = {0};
626 struct radeon_winsys_bo *bo = NULL;
627 uint32_t *ptr;
628 unsigned cnt = 0;
629 unsigned size = 0;
630
631 while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
632 size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
633 ++cnt;
634 }
635
636 assert(cnt);
637
638 bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
639 ptr = ws->buffer_map(bo);
640
641 for (unsigned j = 0; j < cnt; ++j) {
642 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
643 memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
644 ptr += cs->base.cdw;
645
646 }
647
648 while(!size || (size & 7)) {
649 *ptr++ = pad_word;
650 ++size;
651 }
652
653 memset(&request, 0, sizeof(request));
654
655
656 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
657 (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
658 if (r) {
659 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
660 return r;
661 }
662
663 ib.size = size;
664 ib.ib_mc_address = ws->buffer_get_va(bo);
665
666 request.ip_type = AMDGPU_HW_IP_GFX;
667 request.resources = bo_list;
668 request.number_of_ibs = 1;
669 request.ibs = &ib;
670
671 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
672 if (r) {
673 if (r == -ENOMEM)
674 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
675 else
676 fprintf(stderr, "amdgpu: The CS has been rejected, "
677 "see dmesg for more information.\n");
678 }
679
680 amdgpu_bo_list_destroy(bo_list);
681
682 ws->buffer_destroy(bo);
683 if (r)
684 return r;
685
686 i += cnt;
687 }
688 if (fence) {
689 fence->context = ctx->ctx;
690 fence->ip_type = request.ip_type;
691 fence->ip_instance = request.ip_instance;
692 fence->ring = request.ring;
693 fence->fence = request.seq_no;
694 }
695 ctx->last_seq_no = request.seq_no;
696
697 return 0;
698 }
699
700 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
701 struct radeon_winsys_cs **cs_array,
702 unsigned cs_count,
703 bool can_patch,
704 struct radeon_winsys_fence *_fence)
705 {
706 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
707 if (!cs->ws->use_ib_bos) {
708 return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array,
709 cs_count, _fence);
710 } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
711 return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array,
712 cs_count, _fence);
713 } else {
714 return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array,
715 cs_count, _fence);
716 }
717 }
718
719 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
720 {
721 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
722 struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
723 int r;
724
725 if (!ctx)
726 return NULL;
727 r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
728 if (r) {
729 fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
730 goto error_create;
731 }
732 ctx->ws = ws;
733 return (struct radeon_winsys_ctx *)ctx;
734 error_create:
735 return NULL;
736 }
737
738 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
739 {
740 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
741 amdgpu_cs_ctx_free(ctx->ctx);
742 FREE(ctx);
743 }
744
745 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
746 {
747 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
748
749 if (ctx->last_seq_no) {
750 uint32_t expired;
751 struct amdgpu_cs_fence fence;
752
753 fence.context = ctx->ctx;
754 fence.ip_type = RING_GFX;
755 fence.ip_instance = 0;
756 fence.ring = 0;
757 fence.fence = ctx->last_seq_no;
758
759 int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0,
760 &expired);
761
762 if (ret || !expired)
763 return false;
764 }
765
766 return true;
767 }
768
769 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
770 {
771 ws->base.ctx_create = radv_amdgpu_ctx_create;
772 ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
773 ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
774 ws->base.cs_create = radv_amdgpu_cs_create;
775 ws->base.cs_destroy = radv_amdgpu_cs_destroy;
776 ws->base.cs_grow = radv_amdgpu_cs_grow;
777 ws->base.cs_finalize = radv_amdgpu_cs_finalize;
778 ws->base.cs_reset = radv_amdgpu_cs_reset;
779 ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
780 ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
781 ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
782 ws->base.create_fence = radv_amdgpu_create_fence;
783 ws->base.destroy_fence = radv_amdgpu_destroy_fence;
784 ws->base.fence_wait = radv_amdgpu_fence_wait;
785 }