radv/amdgpu: Add winsys implementation of virtual buffers.
[mesa.git] / src / amd / vulkan / winsys / amdgpu / radv_amdgpu_cs.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29
30 #include "ac_debug.h"
31 #include "amdgpu_id.h"
32 #include "radv_radeon_winsys.h"
33 #include "radv_amdgpu_cs.h"
34 #include "radv_amdgpu_bo.h"
35 #include "sid.h"
36
37
38 enum {
39 VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
40 };
41
42 struct radv_amdgpu_cs {
43 struct radeon_winsys_cs base;
44 struct radv_amdgpu_winsys *ws;
45
46 struct amdgpu_cs_ib_info ib;
47
48 struct radeon_winsys_bo *ib_buffer;
49 uint8_t *ib_mapped;
50 unsigned max_num_buffers;
51 unsigned num_buffers;
52 amdgpu_bo_handle *handles;
53 uint8_t *priorities;
54
55 struct radeon_winsys_bo **old_ib_buffers;
56 unsigned num_old_ib_buffers;
57 unsigned max_num_old_ib_buffers;
58 unsigned *ib_size_ptr;
59 bool failed;
60 bool is_chained;
61
62 int buffer_hash_table[1024];
63 unsigned hw_ip;
64
65 unsigned num_virtual_buffers;
66 unsigned max_num_virtual_buffers;
67 struct radeon_winsys_bo **virtual_buffers;
68 uint8_t *virtual_buffer_priorities;
69 int *virtual_buffer_hash_table;
70 };
71
72 static inline struct radv_amdgpu_cs *
73 radv_amdgpu_cs(struct radeon_winsys_cs *base)
74 {
75 return (struct radv_amdgpu_cs*)base;
76 }
77
78 static int ring_to_hw_ip(enum ring_type ring)
79 {
80 switch (ring) {
81 case RING_GFX:
82 return AMDGPU_HW_IP_GFX;
83 case RING_DMA:
84 return AMDGPU_HW_IP_DMA;
85 case RING_COMPUTE:
86 return AMDGPU_HW_IP_COMPUTE;
87 default:
88 unreachable("unsupported ring");
89 }
90 }
91
92 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
93 struct amdgpu_cs_fence *fence,
94 struct amdgpu_cs_request *req)
95 {
96 fence->context = ctx->ctx;
97 fence->ip_type = req->ip_type;
98 fence->ip_instance = req->ip_instance;
99 fence->ring = req->ring;
100 fence->fence = req->seq_no;
101 }
102
103 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
104 {
105 struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
106 return (struct radeon_winsys_fence*)fence;
107 }
108
109 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
110 {
111 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
112 free(fence);
113 }
114
115 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
116 struct radeon_winsys_fence *_fence,
117 bool absolute,
118 uint64_t timeout)
119 {
120 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
121 unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
122 int r;
123 uint32_t expired = 0;
124
125 /* Now use the libdrm query. */
126 r = amdgpu_cs_query_fence_status(fence,
127 timeout,
128 flags,
129 &expired);
130
131 if (r) {
132 fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
133 return false;
134 }
135
136 if (expired)
137 return true;
138
139 return false;
140 }
141
142 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
143 {
144 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
145
146 if (cs->ib_buffer)
147 cs->ws->base.buffer_destroy(cs->ib_buffer);
148 else
149 free(cs->base.buf);
150
151 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
152 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
153
154 free(cs->old_ib_buffers);
155 free(cs->virtual_buffers);
156 free(cs->virtual_buffer_priorities);
157 free(cs->virtual_buffer_hash_table);
158 free(cs->handles);
159 free(cs->priorities);
160 free(cs);
161 }
162
163 static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
164 enum ring_type ring_type)
165 {
166 for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
167 cs->buffer_hash_table[i] = -1;
168
169 cs->hw_ip = ring_to_hw_ip(ring_type);
170 return true;
171 }
172
173 static struct radeon_winsys_cs *
174 radv_amdgpu_cs_create(struct radeon_winsys *ws,
175 enum ring_type ring_type)
176 {
177 struct radv_amdgpu_cs *cs;
178 uint32_t ib_size = 20 * 1024 * 4;
179 cs = calloc(1, sizeof(struct radv_amdgpu_cs));
180 if (!cs)
181 return NULL;
182
183 cs->ws = radv_amdgpu_winsys(ws);
184 radv_amdgpu_init_cs(cs, ring_type);
185
186 if (cs->ws->use_ib_bos) {
187 cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
188 RADEON_DOMAIN_GTT,
189 RADEON_FLAG_CPU_ACCESS);
190 if (!cs->ib_buffer) {
191 free(cs);
192 return NULL;
193 }
194
195 cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
196 if (!cs->ib_mapped) {
197 ws->buffer_destroy(cs->ib_buffer);
198 free(cs);
199 return NULL;
200 }
201
202 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
203 cs->base.buf = (uint32_t *)cs->ib_mapped;
204 cs->base.max_dw = ib_size / 4 - 4;
205 cs->ib_size_ptr = &cs->ib.size;
206 cs->ib.size = 0;
207
208 ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
209 } else {
210 cs->base.buf = malloc(16384);
211 cs->base.max_dw = 4096;
212 if (!cs->base.buf) {
213 free(cs);
214 return NULL;
215 }
216 }
217
218 return &cs->base;
219 }
220
221 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
222 {
223 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
224
225 if (cs->failed) {
226 cs->base.cdw = 0;
227 return;
228 }
229
230 if (!cs->ws->use_ib_bos) {
231 const uint64_t limit_dws = 0xffff8;
232 uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
233 MIN2(cs->base.max_dw * 2, limit_dws));
234
235 /* The total ib size cannot exceed limit_dws dwords. */
236 if (ib_dws > limit_dws)
237 {
238 cs->failed = true;
239 cs->base.cdw = 0;
240 return;
241 }
242
243 uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
244 if (new_buf) {
245 cs->base.buf = new_buf;
246 cs->base.max_dw = ib_dws;
247 } else {
248 cs->failed = true;
249 cs->base.cdw = 0;
250 }
251 return;
252 }
253
254 uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
255
256 /* max that fits in the chain size field. */
257 ib_size = MIN2(ib_size, 0xfffff);
258
259 while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
260 cs->base.buf[cs->base.cdw++] = 0xffff1000;
261
262 *cs->ib_size_ptr |= cs->base.cdw + 4;
263
264 if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
265 cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
266 cs->old_ib_buffers = realloc(cs->old_ib_buffers,
267 cs->max_num_old_ib_buffers * sizeof(void*));
268 }
269
270 cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
271
272 cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
273 RADEON_DOMAIN_GTT,
274 RADEON_FLAG_CPU_ACCESS);
275
276 if (!cs->ib_buffer) {
277 cs->base.cdw = 0;
278 cs->failed = true;
279 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
280 }
281
282 cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
283 if (!cs->ib_mapped) {
284 cs->ws->base.buffer_destroy(cs->ib_buffer);
285 cs->base.cdw = 0;
286 cs->failed = true;
287 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
288 }
289
290 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
291
292 cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
293 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
294 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
295 cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
296 cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
297
298 cs->base.buf = (uint32_t *)cs->ib_mapped;
299 cs->base.cdw = 0;
300 cs->base.max_dw = ib_size / 4 - 4;
301
302 }
303
304 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
305 {
306 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
307
308 if (cs->ws->use_ib_bos) {
309 while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
310 cs->base.buf[cs->base.cdw++] = 0xffff1000;
311
312 *cs->ib_size_ptr |= cs->base.cdw;
313
314 cs->is_chained = false;
315 }
316
317 return !cs->failed;
318 }
319
320 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
321 {
322 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
323 cs->base.cdw = 0;
324 cs->failed = false;
325
326 for (unsigned i = 0; i < cs->num_buffers; ++i) {
327 unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
328 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
329 cs->buffer_hash_table[hash] = -1;
330 }
331
332 for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
333 unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
334 cs->virtual_buffer_hash_table[hash] = -1;
335 }
336
337 cs->num_buffers = 0;
338 cs->num_virtual_buffers = 0;
339
340 if (cs->ws->use_ib_bos) {
341 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
342
343 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
344 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
345
346 cs->num_old_ib_buffers = 0;
347 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
348 cs->ib_size_ptr = &cs->ib.size;
349 cs->ib.size = 0;
350 }
351 }
352
353 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
354 amdgpu_bo_handle bo)
355 {
356 unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
357 int index = cs->buffer_hash_table[hash];
358
359 if (index == -1)
360 return -1;
361
362 if (cs->handles[index] == bo)
363 return index;
364
365 for (unsigned i = 0; i < cs->num_buffers; ++i) {
366 if (cs->handles[i] == bo) {
367 cs->buffer_hash_table[hash] = i;
368 return i;
369 }
370 }
371
372 return -1;
373 }
374
375 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
376 amdgpu_bo_handle bo,
377 uint8_t priority)
378 {
379 unsigned hash;
380 int index = radv_amdgpu_cs_find_buffer(cs, bo);
381
382 if (index != -1) {
383 cs->priorities[index] = MAX2(cs->priorities[index], priority);
384 return;
385 }
386
387 if (cs->num_buffers == cs->max_num_buffers) {
388 unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
389 cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
390 cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
391 cs->max_num_buffers = new_count;
392 }
393
394 cs->handles[cs->num_buffers] = bo;
395 cs->priorities[cs->num_buffers] = priority;
396
397 hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
398 cs->buffer_hash_table[hash] = cs->num_buffers;
399
400 ++cs->num_buffers;
401 }
402
403 static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs,
404 struct radeon_winsys_bo *bo,
405 uint8_t priority)
406 {
407 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
408 unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
409
410
411 if (!cs->virtual_buffer_hash_table) {
412 cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
413 for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
414 cs->virtual_buffer_hash_table[i] = -1;
415 }
416
417 if (cs->virtual_buffer_hash_table[hash] >= 0) {
418 int idx = cs->virtual_buffer_hash_table[hash];
419 if (cs->virtual_buffers[idx] == bo) {
420 cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority);
421 return;
422 }
423 for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
424 if (cs->virtual_buffers[i] == bo) {
425 cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority);
426 cs->virtual_buffer_hash_table[hash] = i;
427 return;
428 }
429 }
430 }
431
432 if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
433 cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
434 cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers);
435 cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers);
436 }
437
438 cs->virtual_buffers[cs->num_virtual_buffers] = bo;
439 cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority;
440
441 cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
442 ++cs->num_virtual_buffers;
443
444 }
445
446 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
447 struct radeon_winsys_bo *_bo,
448 uint8_t priority)
449 {
450 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
451 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
452
453 if (bo->is_virtual) {
454 radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority);
455 return;
456 }
457
458 radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
459 }
460
461 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
462 struct radeon_winsys_cs *_child)
463 {
464 struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
465 struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
466
467 for (unsigned i = 0; i < child->num_buffers; ++i) {
468 radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
469 child->priorities[i]);
470 }
471
472 for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
473 radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i],
474 child->virtual_buffer_priorities[i]);
475 }
476
477 if (parent->ws->use_ib_bos) {
478 if (parent->base.cdw + 4 > parent->base.max_dw)
479 radv_amdgpu_cs_grow(&parent->base, 4);
480
481 parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
482 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
483 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
484 parent->base.buf[parent->base.cdw++] = child->ib.size;
485 } else {
486 if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
487 radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
488
489 memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
490 parent->base.cdw += child->base.cdw;
491 }
492 }
493
494 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
495 struct radeon_winsys_cs **cs_array,
496 unsigned count,
497 struct radv_amdgpu_winsys_bo *extra_bo,
498 struct radeon_winsys_cs *extra_cs,
499 amdgpu_bo_list_handle *bo_list)
500 {
501 int r;
502 if (ws->debug_all_bos) {
503 struct radv_amdgpu_winsys_bo *bo;
504 amdgpu_bo_handle *handles;
505 unsigned num = 0;
506
507 pthread_mutex_lock(&ws->global_bo_list_lock);
508
509 handles = malloc(sizeof(handles[0]) * ws->num_buffers);
510 if (!handles) {
511 pthread_mutex_unlock(&ws->global_bo_list_lock);
512 return -ENOMEM;
513 }
514
515 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
516 assert(num < ws->num_buffers);
517 handles[num++] = bo->bo;
518 }
519
520 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
521 handles, NULL,
522 bo_list);
523 free(handles);
524 pthread_mutex_unlock(&ws->global_bo_list_lock);
525 } else if (count == 1 && !extra_bo && !extra_cs &&
526 !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
527 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
528 r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
529 cs->priorities, bo_list);
530 } else {
531 unsigned total_buffer_count = !!extra_bo;
532 unsigned unique_bo_count = !!extra_bo;
533 for (unsigned i = 0; i < count; ++i) {
534 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
535 total_buffer_count += cs->num_buffers;
536 for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
537 total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
538 }
539
540 if (extra_cs) {
541 total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
542 }
543
544 amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
545 uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
546 if (!handles || !priorities) {
547 free(handles);
548 free(priorities);
549 return -ENOMEM;
550 }
551
552 if (extra_bo) {
553 handles[0] = extra_bo->bo;
554 priorities[0] = 8;
555 }
556
557 for (unsigned i = 0; i < count + !!extra_cs; ++i) {
558 struct radv_amdgpu_cs *cs;
559
560 if (i == count)
561 cs = (struct radv_amdgpu_cs*)extra_cs;
562 else
563 cs = (struct radv_amdgpu_cs*)cs_array[i];
564
565 if (!cs->num_buffers)
566 continue;
567
568 if (unique_bo_count == 0) {
569 memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
570 memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t));
571 unique_bo_count = cs->num_buffers;
572 continue;
573 }
574 int unique_bo_so_far = unique_bo_count;
575 for (unsigned j = 0; j < cs->num_buffers; ++j) {
576 bool found = false;
577 for (unsigned k = 0; k < unique_bo_so_far; ++k) {
578 if (handles[k] == cs->handles[j]) {
579 found = true;
580 priorities[k] = MAX2(priorities[k],
581 cs->priorities[j]);
582 break;
583 }
584 }
585 if (!found) {
586 handles[unique_bo_count] = cs->handles[j];
587 priorities[unique_bo_count] = cs->priorities[j];
588 ++unique_bo_count;
589 }
590 }
591 for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
592 struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
593 for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
594 struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
595 bool found = false;
596 for (unsigned m = 0; m < unique_bo_count; ++m) {
597 if (handles[m] == bo->bo) {
598 found = true;
599 priorities[m] = MAX2(priorities[m],
600 cs->virtual_buffer_priorities[j]);
601 break;
602 }
603 }
604 if (!found) {
605 handles[unique_bo_count] = bo->bo;
606 priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];
607 ++unique_bo_count;
608 }
609 }
610 }
611 }
612 r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
613 priorities, bo_list);
614
615 free(handles);
616 free(priorities);
617 }
618
619 return r;
620 }
621
622 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
623 struct amdgpu_cs_request *request)
624 {
625 radv_amdgpu_request_to_fence(ctx,
626 &ctx->last_submission[request->ip_type][request->ring],
627 request);
628 }
629
630 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
631 int queue_idx,
632 struct radeon_winsys_cs **cs_array,
633 unsigned cs_count,
634 struct radeon_winsys_cs *initial_preamble_cs,
635 struct radeon_winsys_cs *continue_preamble_cs,
636 struct radeon_winsys_fence *_fence)
637 {
638 int r;
639 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
640 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
641 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
642 amdgpu_bo_list_handle bo_list;
643 struct amdgpu_cs_request request = {0};
644 struct amdgpu_cs_ib_info ibs[2];
645
646 for (unsigned i = cs_count; i--;) {
647 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
648
649 if (cs->is_chained) {
650 *cs->ib_size_ptr -= 4;
651 cs->is_chained = false;
652 }
653
654 if (i + 1 < cs_count) {
655 struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
656 assert(cs->base.cdw + 4 <= cs->base.max_dw);
657
658 cs->is_chained = true;
659 *cs->ib_size_ptr += 4;
660
661 cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
662 cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
663 cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
664 cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
665 }
666 }
667
668 r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list);
669 if (r) {
670 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
671 return r;
672 }
673
674 request.ip_type = cs0->hw_ip;
675 request.ring = queue_idx;
676 request.number_of_ibs = 1;
677 request.ibs = &cs0->ib;
678 request.resources = bo_list;
679
680 if (initial_preamble_cs) {
681 request.ibs = ibs;
682 request.number_of_ibs = 2;
683 ibs[1] = cs0->ib;
684 ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib;
685 }
686
687 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
688 if (r) {
689 if (r == -ENOMEM)
690 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
691 else
692 fprintf(stderr, "amdgpu: The CS has been rejected, "
693 "see dmesg for more information.\n");
694 }
695
696 amdgpu_bo_list_destroy(bo_list);
697
698 if (fence)
699 radv_amdgpu_request_to_fence(ctx, fence, &request);
700
701 radv_assign_last_submit(ctx, &request);
702
703 return r;
704 }
705
706 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
707 int queue_idx,
708 struct radeon_winsys_cs **cs_array,
709 unsigned cs_count,
710 struct radeon_winsys_cs *initial_preamble_cs,
711 struct radeon_winsys_cs *continue_preamble_cs,
712 struct radeon_winsys_fence *_fence)
713 {
714 int r;
715 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
716 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
717 amdgpu_bo_list_handle bo_list;
718 struct amdgpu_cs_request request;
719
720 assert(cs_count);
721
722 for (unsigned i = 0; i < cs_count;) {
723 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
724 struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
725 struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
726 unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
727 cs_count - i);
728
729 memset(&request, 0, sizeof(request));
730
731 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
732 preamble_cs, &bo_list);
733 if (r) {
734 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
735 return r;
736 }
737
738 request.ip_type = cs0->hw_ip;
739 request.ring = queue_idx;
740 request.resources = bo_list;
741 request.number_of_ibs = cnt + !!preamble_cs;
742 request.ibs = ibs;
743
744 if (preamble_cs) {
745 ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
746 }
747
748 for (unsigned j = 0; j < cnt; ++j) {
749 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
750 ibs[j + !!preamble_cs] = cs->ib;
751
752 if (cs->is_chained) {
753 *cs->ib_size_ptr -= 4;
754 cs->is_chained = false;
755 }
756 }
757
758 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
759 if (r) {
760 if (r == -ENOMEM)
761 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
762 else
763 fprintf(stderr, "amdgpu: The CS has been rejected, "
764 "see dmesg for more information.\n");
765 }
766
767 amdgpu_bo_list_destroy(bo_list);
768
769 if (r)
770 return r;
771
772 i += cnt;
773 }
774 if (fence)
775 radv_amdgpu_request_to_fence(ctx, fence, &request);
776
777 radv_assign_last_submit(ctx, &request);
778
779 return 0;
780 }
781
782 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
783 int queue_idx,
784 struct radeon_winsys_cs **cs_array,
785 unsigned cs_count,
786 struct radeon_winsys_cs *initial_preamble_cs,
787 struct radeon_winsys_cs *continue_preamble_cs,
788 struct radeon_winsys_fence *_fence)
789 {
790 int r;
791 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
792 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
793 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
794 struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
795 amdgpu_bo_list_handle bo_list;
796 struct amdgpu_cs_request request;
797 uint32_t pad_word = 0xffff1000U;
798
799 if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
800 pad_word = 0x80000000;
801
802 assert(cs_count);
803
804 for (unsigned i = 0; i < cs_count;) {
805 struct amdgpu_cs_ib_info ib = {0};
806 struct radeon_winsys_bo *bo = NULL;
807 struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
808 uint32_t *ptr;
809 unsigned cnt = 0;
810 unsigned size = 0;
811
812 if (preamble_cs)
813 size += preamble_cs->cdw;
814
815 while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
816 size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
817 ++cnt;
818 }
819
820 assert(cnt);
821
822 bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
823 ptr = ws->buffer_map(bo);
824
825 if (preamble_cs) {
826 memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
827 ptr += preamble_cs->cdw;
828 }
829
830 for (unsigned j = 0; j < cnt; ++j) {
831 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
832 memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
833 ptr += cs->base.cdw;
834
835 }
836
837 while(!size || (size & 7)) {
838 *ptr++ = pad_word;
839 ++size;
840 }
841
842 memset(&request, 0, sizeof(request));
843
844
845 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
846 (struct radv_amdgpu_winsys_bo*)bo,
847 preamble_cs, &bo_list);
848 if (r) {
849 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
850 return r;
851 }
852
853 ib.size = size;
854 ib.ib_mc_address = ws->buffer_get_va(bo);
855
856 request.ip_type = cs0->hw_ip;
857 request.ring = queue_idx;
858 request.resources = bo_list;
859 request.number_of_ibs = 1;
860 request.ibs = &ib;
861
862 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
863 if (r) {
864 if (r == -ENOMEM)
865 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
866 else
867 fprintf(stderr, "amdgpu: The CS has been rejected, "
868 "see dmesg for more information.\n");
869 }
870
871 amdgpu_bo_list_destroy(bo_list);
872
873 ws->buffer_destroy(bo);
874 if (r)
875 return r;
876
877 i += cnt;
878 }
879 if (fence)
880 radv_amdgpu_request_to_fence(ctx, fence, &request);
881
882 radv_assign_last_submit(ctx, &request);
883
884 return 0;
885 }
886
887 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
888 int queue_idx,
889 struct radeon_winsys_cs **cs_array,
890 unsigned cs_count,
891 struct radeon_winsys_cs *initial_preamble_cs,
892 struct radeon_winsys_cs *continue_preamble_cs,
893 struct radeon_winsys_sem **wait_sem,
894 unsigned wait_sem_count,
895 struct radeon_winsys_sem **signal_sem,
896 unsigned signal_sem_count,
897 bool can_patch,
898 struct radeon_winsys_fence *_fence)
899 {
900 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
901 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
902 int ret;
903 int i;
904
905 for (i = 0; i < wait_sem_count; i++) {
906 amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i];
907 amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
908 sem);
909 }
910 if (!cs->ws->use_ib_bos) {
911 ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
912 cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
913 } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
914 ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
915 cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
916 } else {
917 ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
918 cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
919 }
920
921 for (i = 0; i < signal_sem_count; i++) {
922 amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i];
923 amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
924 sem);
925 }
926 return ret;
927 }
928
929
930 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
931 {
932 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
933 void *ret = NULL;
934 for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
935 struct radv_amdgpu_winsys_bo *bo;
936
937 bo = (struct radv_amdgpu_winsys_bo*)
938 (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
939 if (addr >= bo->va && addr - bo->va < bo->size) {
940 if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
941 return (char *)ret + (addr - bo->va);
942 }
943 }
944 return ret;
945 }
946
947 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
948 FILE* file,
949 uint32_t trace_id)
950 {
951 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
952
953 ac_parse_ib(file,
954 radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
955 cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class,
956 radv_amdgpu_winsys_get_cpu_addr, cs);
957 }
958
959 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
960 {
961 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
962 struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
963 int r;
964
965 if (!ctx)
966 return NULL;
967 r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
968 if (r) {
969 fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
970 goto error_create;
971 }
972 ctx->ws = ws;
973 return (struct radeon_winsys_ctx *)ctx;
974 error_create:
975 FREE(ctx);
976 return NULL;
977 }
978
979 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
980 {
981 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
982 amdgpu_cs_ctx_free(ctx->ctx);
983 FREE(ctx);
984 }
985
986 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
987 enum ring_type ring_type, int ring_index)
988 {
989 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
990 int ip_type = ring_to_hw_ip(ring_type);
991
992 if (ctx->last_submission[ip_type][ring_index].fence) {
993 uint32_t expired;
994 int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
995 1000000000ull, 0, &expired);
996
997 if (ret || !expired)
998 return false;
999 }
1000
1001 return true;
1002 }
1003
1004 static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
1005 {
1006 int ret;
1007 amdgpu_semaphore_handle sem;
1008
1009 ret = amdgpu_cs_create_semaphore(&sem);
1010 if (ret)
1011 return NULL;
1012 return (struct radeon_winsys_sem *)sem;
1013 }
1014
1015 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
1016 {
1017 amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem;
1018 amdgpu_cs_destroy_semaphore(sem);
1019 }
1020
1021 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
1022 {
1023 ws->base.ctx_create = radv_amdgpu_ctx_create;
1024 ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
1025 ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
1026 ws->base.cs_create = radv_amdgpu_cs_create;
1027 ws->base.cs_destroy = radv_amdgpu_cs_destroy;
1028 ws->base.cs_grow = radv_amdgpu_cs_grow;
1029 ws->base.cs_finalize = radv_amdgpu_cs_finalize;
1030 ws->base.cs_reset = radv_amdgpu_cs_reset;
1031 ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
1032 ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
1033 ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
1034 ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
1035 ws->base.create_fence = radv_amdgpu_create_fence;
1036 ws->base.destroy_fence = radv_amdgpu_destroy_fence;
1037 ws->base.create_sem = radv_amdgpu_create_sem;
1038 ws->base.destroy_sem = radv_amdgpu_destroy_sem;
1039 ws->base.fence_wait = radv_amdgpu_fence_wait;
1040 }