radv: add initial non-conformant radv vulkan driver
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_cs.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29
30 #include "amdgpu_id.h"
31 #include "radv_radeon_winsys.h"
32 #include "radv_amdgpu_cs.h"
33 #include "radv_amdgpu_bo.h"
34 #include "sid.h"
35
36 struct radv_amdgpu_cs {
37 struct radeon_winsys_cs base;
38 struct radv_amdgpu_winsys *ws;
39
40 struct amdgpu_cs_ib_info ib;
41
42 struct radeon_winsys_bo *ib_buffer;
43 uint8_t *ib_mapped;
44 unsigned max_num_buffers;
45 unsigned num_buffers;
46 amdgpu_bo_handle *handles;
47 uint8_t *priorities;
48
49 struct radeon_winsys_bo **old_ib_buffers;
50 unsigned num_old_ib_buffers;
51 unsigned max_num_old_ib_buffers;
52 unsigned *ib_size_ptr;
53 bool failed;
54 bool is_chained;
55
56 int buffer_hash_table[1024];
57 };
58
59 static inline struct radv_amdgpu_cs *
60 radv_amdgpu_cs(struct radeon_winsys_cs *base)
61 {
62 return (struct radv_amdgpu_cs*)base;
63 }
64
65
66 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
67 {
68 struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
69 return (struct radeon_winsys_fence*)fence;
70 }
71
72 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
73 {
74 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
75 free(fence);
76 }
77
78 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
79 struct radeon_winsys_fence *_fence,
80 bool absolute,
81 uint64_t timeout)
82 {
83 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
84 unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
85 int r;
86 uint32_t expired = 0;
87 /* Now use the libdrm query. */
88 r = amdgpu_cs_query_fence_status(fence,
89 timeout,
90 flags,
91 &expired);
92
93 if (r) {
94 fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
95 return false;
96 }
97
98 if (expired) {
99 return true;
100 }
101 return false;
102
103 }
104
105 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
106 {
107 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
108 if (cs->ib_buffer)
109 cs->ws->base.buffer_destroy(cs->ib_buffer);
110 else
111 free(cs->base.buf);
112
113 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
114 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
115 free(cs->old_ib_buffers);
116 free(cs->handles);
117 free(cs->priorities);
118 free(cs);
119 }
120
121 static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
122 enum ring_type ring_type)
123 {
124 for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i) {
125 cs->buffer_hash_table[i] = -1;
126 }
127 return true;
128 }
129
130 static struct radeon_winsys_cs *
131 radv_amdgpu_cs_create(struct radeon_winsys *ws,
132 enum ring_type ring_type)
133 {
134 struct radv_amdgpu_cs *cs;
135 uint32_t ib_size = 20 * 1024 * 4;
136 cs = calloc(1, sizeof(struct radv_amdgpu_cs));
137 if (!cs)
138 return NULL;
139
140 cs->ws = radv_amdgpu_winsys(ws);
141 radv_amdgpu_init_cs(cs, RING_GFX);
142
143 if (cs->ws->use_ib_bos) {
144 cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
145 RADEON_DOMAIN_GTT,
146 RADEON_FLAG_CPU_ACCESS);
147 if (!cs->ib_buffer) {
148 free(cs);
149 return NULL;
150 }
151
152 cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
153 if (!cs->ib_mapped) {
154 ws->buffer_destroy(cs->ib_buffer);
155 free(cs);
156 return NULL;
157 }
158
159 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
160 cs->base.buf = (uint32_t *)cs->ib_mapped;
161 cs->base.max_dw = ib_size / 4 - 4;
162 cs->ib_size_ptr = &cs->ib.size;
163 cs->ib.size = 0;
164
165 ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
166 } else {
167 cs->base.buf = malloc(16384);
168 cs->base.max_dw = 4096;
169 if (!cs->base.buf) {
170 free(cs);
171 return NULL;
172 }
173 }
174
175 return &cs->base;
176 }
177
178 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
179 {
180 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
181 uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
182
183 /* max that fits in the chain size field. */
184 ib_size = MIN2(ib_size, 0xfffff);
185
186 if (cs->failed) {
187 cs->base.cdw = 0;
188 return;
189 }
190
191 if (!cs->ws->use_ib_bos) {
192 uint32_t *new_buf = realloc(cs->base.buf, ib_size);
193 if (new_buf) {
194 cs->base.buf = new_buf;
195 cs->base.max_dw = ib_size / 4;
196 } else {
197 cs->failed = true;
198 cs->base.cdw = 0;
199 }
200 return;
201 }
202
203 while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
204 cs->base.buf[cs->base.cdw++] = 0xffff1000;
205
206 *cs->ib_size_ptr |= cs->base.cdw + 4;
207
208 if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
209 cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
210 cs->old_ib_buffers = realloc(cs->old_ib_buffers,
211 cs->max_num_old_ib_buffers * sizeof(void*));
212 }
213
214 cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
215
216 cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
217 RADEON_DOMAIN_GTT,
218 RADEON_FLAG_CPU_ACCESS);
219
220 if (!cs->ib_buffer) {
221 cs->base.cdw = 0;
222 cs->failed = true;
223 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
224 }
225
226 cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
227 if (!cs->ib_mapped) {
228 cs->ws->base.buffer_destroy(cs->ib_buffer);
229 cs->base.cdw = 0;
230 cs->failed = true;
231 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
232 }
233
234 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
235
236 cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
237 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
238 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
239 cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
240 cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
241
242 cs->base.buf = (uint32_t *)cs->ib_mapped;
243 cs->base.cdw = 0;
244 cs->base.max_dw = ib_size / 4 - 4;
245
246 }
247
248 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
249 {
250 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
251
252 if (cs->ws->use_ib_bos) {
253 while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
254 cs->base.buf[cs->base.cdw++] = 0xffff1000;
255
256 *cs->ib_size_ptr |= cs->base.cdw;
257
258 cs->is_chained = false;
259 }
260
261 return !cs->failed;
262 }
263
264 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
265 {
266 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
267 cs->base.cdw = 0;
268 cs->failed = false;
269
270 for (unsigned i = 0; i < cs->num_buffers; ++i) {
271 unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
272 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
273 cs->buffer_hash_table[hash] = -1;
274 }
275
276 cs->num_buffers = 0;
277
278 if (cs->ws->use_ib_bos) {
279 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
280
281 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
282 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
283
284 cs->num_old_ib_buffers = 0;
285 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
286 cs->ib_size_ptr = &cs->ib.size;
287 cs->ib.size = 0;
288 }
289 }
290
291 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
292 amdgpu_bo_handle bo)
293 {
294 unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
295 int index = cs->buffer_hash_table[hash];
296
297 if (index == -1)
298 return -1;
299
300 if(cs->handles[index] == bo)
301 return index;
302
303 for (unsigned i = 0; i < cs->num_buffers; ++i) {
304 if (cs->handles[i] == bo) {
305 cs->buffer_hash_table[hash] = i;
306 return i;
307 }
308 }
309 return -1;
310 }
311
312 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
313 amdgpu_bo_handle bo,
314 uint8_t priority)
315 {
316 unsigned hash;
317 int index = radv_amdgpu_cs_find_buffer(cs, bo);
318
319 if (index != -1) {
320 cs->priorities[index] = MAX2(cs->priorities[index], priority);
321 return;
322 }
323
324 if (cs->num_buffers == cs->max_num_buffers) {
325 unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
326 cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
327 cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
328 cs->max_num_buffers = new_count;
329 }
330
331 cs->handles[cs->num_buffers] = bo;
332 cs->priorities[cs->num_buffers] = priority;
333
334 hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
335 cs->buffer_hash_table[hash] = cs->num_buffers;
336
337 ++cs->num_buffers;
338 }
339
340 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
341 struct radeon_winsys_bo *_bo,
342 uint8_t priority)
343 {
344 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
345 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
346
347 radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
348 }
349
350 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
351 struct radeon_winsys_cs *_child)
352 {
353 struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
354 struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
355
356 for (unsigned i = 0; i < child->num_buffers; ++i) {
357 radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
358 child->priorities[i]);
359 }
360
361 if (parent->ws->use_ib_bos) {
362 if (parent->base.cdw + 4 > parent->base.max_dw)
363 radv_amdgpu_cs_grow(&parent->base, 4);
364
365 parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
366 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
367 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
368 parent->base.buf[parent->base.cdw++] = child->ib.size;
369 } else {
370 if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
371 radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
372
373 memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
374 parent->base.cdw += child->base.cdw;
375 }
376 }
377
378 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
379 struct radeon_winsys_cs **cs_array,
380 unsigned count,
381 struct radv_amdgpu_winsys_bo *extra_bo,
382 amdgpu_bo_list_handle *bo_list)
383 {
384 int r;
385 if (ws->debug_all_bos) {
386 struct radv_amdgpu_winsys_bo *bo;
387 amdgpu_bo_handle *handles;
388 unsigned num = 0;
389
390 pthread_mutex_lock(&ws->global_bo_list_lock);
391
392 handles = malloc(sizeof(handles[0]) * ws->num_buffers);
393 if (!handles) {
394 pthread_mutex_unlock(&ws->global_bo_list_lock);
395 return -ENOMEM;
396 }
397
398 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
399 assert(num < ws->num_buffers);
400 handles[num++] = bo->bo;
401 }
402
403 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
404 handles, NULL,
405 bo_list);
406 free(handles);
407 pthread_mutex_unlock(&ws->global_bo_list_lock);
408 } else if (count == 1 && !extra_bo) {
409 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
410 r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
411 cs->priorities, bo_list);
412 } else {
413 unsigned total_buffer_count = !!extra_bo;
414 unsigned unique_bo_count = !!extra_bo;
415 for (unsigned i = 0; i < count; ++i) {
416 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
417 total_buffer_count += cs->num_buffers;
418 }
419
420 amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
421 uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
422 if (!handles || !priorities) {
423 free(handles);
424 free(priorities);
425 return -ENOMEM;
426 }
427
428 if (extra_bo) {
429 handles[0] = extra_bo->bo;
430 priorities[0] = 8;
431 }
432
433 for (unsigned i = 0; i < count; ++i) {
434 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
435 for (unsigned j = 0; j < cs->num_buffers; ++j) {
436 bool found = false;
437 for (unsigned k = 0; k < unique_bo_count; ++k) {
438 if (handles[k] == cs->handles[j]) {
439 found = true;
440 priorities[k] = MAX2(priorities[k],
441 cs->priorities[j]);
442 break;
443 }
444 }
445 if (!found) {
446 handles[unique_bo_count] = cs->handles[j];
447 priorities[unique_bo_count] = cs->priorities[j];
448 ++unique_bo_count;
449 }
450 }
451 }
452 r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
453 priorities, bo_list);
454
455 free(handles);
456 free(priorities);
457 }
458 return r;
459 }
460
461 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
462 struct radeon_winsys_cs **cs_array,
463 unsigned cs_count,
464 struct radeon_winsys_fence *_fence)
465 {
466 int r;
467 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
468 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
469 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
470 amdgpu_bo_list_handle bo_list;
471 struct amdgpu_cs_request request = {0};
472
473 for (unsigned i = cs_count; i--;) {
474 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
475
476 if (cs->is_chained) {
477 *cs->ib_size_ptr -= 4;
478 cs->is_chained = false;
479 }
480
481 if (i + 1 < cs_count) {
482 struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
483 assert(cs->base.cdw + 4 <= cs->base.max_dw);
484
485 cs->is_chained = true;
486 *cs->ib_size_ptr += 4;
487
488 cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
489 cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
490 cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
491 cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
492 }
493 }
494
495 r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
496 if (r) {
497 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
498 return r;
499 }
500
501 request.ip_type = AMDGPU_HW_IP_GFX;
502 request.number_of_ibs = 1;
503 request.ibs = &cs0->ib;
504 request.resources = bo_list;
505
506 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
507 if (r) {
508 if (r == -ENOMEM)
509 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
510 else
511 fprintf(stderr, "amdgpu: The CS has been rejected, "
512 "see dmesg for more information.\n");
513 }
514
515 amdgpu_bo_list_destroy(bo_list);
516
517 if (fence) {
518 fence->context = ctx->ctx;
519 fence->ip_type = request.ip_type;
520 fence->ip_instance = request.ip_instance;
521 fence->ring = request.ring;
522 fence->fence = request.seq_no;
523 }
524 ctx->last_seq_no = request.seq_no;
525
526 return r;
527 }
528
529 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
530 struct radeon_winsys_cs **cs_array,
531 unsigned cs_count,
532 struct radeon_winsys_fence *_fence)
533 {
534 int r;
535 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
536 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
537 amdgpu_bo_list_handle bo_list;
538 struct amdgpu_cs_request request;
539
540 assert(cs_count);
541
542 for (unsigned i = 0; i < cs_count;) {
543 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
544 struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
545 unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
546
547 memset(&request, 0, sizeof(request));
548
549 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
550 if (r) {
551 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
552 return r;
553 }
554
555 request.ip_type = AMDGPU_HW_IP_GFX;
556 request.resources = bo_list;
557 request.number_of_ibs = cnt;
558 request.ibs = ibs;
559
560 for (unsigned j = 0; j < cnt; ++j) {
561 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
562 ibs[j] = cs->ib;
563
564 if (cs->is_chained) {
565 *cs->ib_size_ptr -= 4;
566 cs->is_chained = false;
567 }
568 }
569
570 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
571 if (r) {
572 if (r == -ENOMEM)
573 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
574 else
575 fprintf(stderr, "amdgpu: The CS has been rejected, "
576 "see dmesg for more information.\n");
577 }
578
579 amdgpu_bo_list_destroy(bo_list);
580
581 if (r)
582 return r;
583
584 i += cnt;
585 }
586 if (fence) {
587 fence->context = ctx->ctx;
588 fence->ip_type = request.ip_type;
589 fence->ip_instance = request.ip_instance;
590 fence->ring = request.ring;
591 fence->fence = request.seq_no;
592 }
593 ctx->last_seq_no = request.seq_no;
594
595 return 0;
596 }
597
598 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
599 struct radeon_winsys_cs **cs_array,
600 unsigned cs_count,
601 struct radeon_winsys_fence *_fence)
602 {
603 int r;
604 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
605 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
606 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
607 struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
608 amdgpu_bo_list_handle bo_list;
609 struct amdgpu_cs_request request;
610 uint32_t pad_word = 0xffff1000U;
611
612 if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
613 pad_word = 0x80000000;
614
615 assert(cs_count);
616
617 for (unsigned i = 0; i < cs_count;) {
618 struct amdgpu_cs_ib_info ib = {0};
619 struct radeon_winsys_bo *bo = NULL;
620 uint32_t *ptr;
621 unsigned cnt = 0;
622 unsigned size = 0;
623
624 while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
625 size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
626 ++cnt;
627 }
628
629 assert(cnt);
630
631 bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
632 ptr = ws->buffer_map(bo);
633
634 for (unsigned j = 0; j < cnt; ++j) {
635 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
636 memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
637 ptr += cs->base.cdw;
638
639 }
640
641 while(!size || (size & 7)) {
642 *ptr++ = pad_word;
643 ++size;
644 }
645
646 memset(&request, 0, sizeof(request));
647
648
649 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
650 (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
651 if (r) {
652 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
653 return r;
654 }
655
656 ib.size = size;
657 ib.ib_mc_address = ws->buffer_get_va(bo);
658
659 request.ip_type = AMDGPU_HW_IP_GFX;
660 request.resources = bo_list;
661 request.number_of_ibs = 1;
662 request.ibs = &ib;
663
664 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
665 if (r) {
666 if (r == -ENOMEM)
667 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
668 else
669 fprintf(stderr, "amdgpu: The CS has been rejected, "
670 "see dmesg for more information.\n");
671 }
672
673 amdgpu_bo_list_destroy(bo_list);
674
675 ws->buffer_destroy(bo);
676 if (r)
677 return r;
678
679 i += cnt;
680 }
681 if (fence) {
682 fence->context = ctx->ctx;
683 fence->ip_type = request.ip_type;
684 fence->ip_instance = request.ip_instance;
685 fence->ring = request.ring;
686 fence->fence = request.seq_no;
687 }
688 ctx->last_seq_no = request.seq_no;
689
690 return 0;
691 }
692
693 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
694 struct radeon_winsys_cs **cs_array,
695 unsigned cs_count,
696 bool can_patch,
697 struct radeon_winsys_fence *_fence)
698 {
699 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
700 if (!cs->ws->use_ib_bos) {
701 return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array,
702 cs_count, _fence);
703 } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
704 return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array,
705 cs_count, _fence);
706 } else {
707 return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array,
708 cs_count, _fence);
709 }
710 }
711
712 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
713 {
714 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
715 struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
716 int r;
717
718 if (!ctx)
719 return NULL;
720 r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
721 if (r) {
722 fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
723 goto error_create;
724 }
725 ctx->ws = ws;
726 return (struct radeon_winsys_ctx *)ctx;
727 error_create:
728 return NULL;
729 }
730
731 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
732 {
733 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
734 amdgpu_cs_ctx_free(ctx->ctx);
735 FREE(ctx);
736 }
737
738 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
739 {
740 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
741
742 if (ctx->last_seq_no) {
743 uint32_t expired;
744 struct amdgpu_cs_fence fence;
745
746 fence.context = ctx->ctx;
747 fence.ip_type = RING_GFX;
748 fence.ip_instance = 0;
749 fence.ring = 0;
750 fence.fence = ctx->last_seq_no;
751
752 int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0,
753 &expired);
754
755 if (ret || !expired)
756 return false;
757 }
758
759 return true;
760 }
761
762 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
763 {
764 ws->base.ctx_create = radv_amdgpu_ctx_create;
765 ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
766 ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
767 ws->base.cs_create = radv_amdgpu_cs_create;
768 ws->base.cs_destroy = radv_amdgpu_cs_destroy;
769 ws->base.cs_grow = radv_amdgpu_cs_grow;
770 ws->base.cs_finalize = radv_amdgpu_cs_finalize;
771 ws->base.cs_reset = radv_amdgpu_cs_reset;
772 ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
773 ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
774 ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
775 ws->base.create_fence = radv_amdgpu_create_fence;
776 ws->base.destroy_fence = radv_amdgpu_destroy_fence;
777 ws->base.fence_wait = radv_amdgpu_fence_wait;
778 }