winsys/amdgpu: add fence and buffer list logic for slab allocated buffers
[mesa.git] / src / gallium / winsys / amdgpu / drm / amdgpu_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * Copyright © 2015 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
19 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 */
28 /*
29 * Authors:
30 * Marek Olšák <maraeo@gmail.com>
31 */
32
33 #include "amdgpu_cs.h"
34 #include "os/os_time.h"
35 #include <stdio.h>
36 #include <amdgpu_drm.h>
37
38 #include "amd/common/sid.h"
39
40 /* FENCES */
41
42 static struct pipe_fence_handle *
43 amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
44 unsigned ip_instance, unsigned ring)
45 {
46 struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
47
48 fence->reference.count = 1;
49 fence->ctx = ctx;
50 fence->fence.context = ctx->ctx;
51 fence->fence.ip_type = ip_type;
52 fence->fence.ip_instance = ip_instance;
53 fence->fence.ring = ring;
54 fence->submission_in_progress = true;
55 p_atomic_inc(&ctx->refcount);
56 return (struct pipe_fence_handle *)fence;
57 }
58
59 static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
60 struct amdgpu_cs_request* request,
61 uint64_t *user_fence_cpu_address)
62 {
63 struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
64
65 rfence->fence.fence = request->seq_no;
66 rfence->user_fence_cpu_address = user_fence_cpu_address;
67 rfence->submission_in_progress = false;
68 }
69
70 static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
71 {
72 struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
73
74 rfence->signalled = true;
75 rfence->submission_in_progress = false;
76 }
77
78 bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
79 bool absolute)
80 {
81 struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
82 uint32_t expired;
83 int64_t abs_timeout;
84 uint64_t *user_fence_cpu;
85 int r;
86
87 if (rfence->signalled)
88 return true;
89
90 if (absolute)
91 abs_timeout = timeout;
92 else
93 abs_timeout = os_time_get_absolute_timeout(timeout);
94
95 /* The fence might not have a number assigned if its IB is being
96 * submitted in the other thread right now. Wait until the submission
97 * is done. */
98 if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress,
99 abs_timeout))
100 return false;
101
102 user_fence_cpu = rfence->user_fence_cpu_address;
103 if (user_fence_cpu) {
104 if (*user_fence_cpu >= rfence->fence.fence) {
105 rfence->signalled = true;
106 return true;
107 }
108
109 /* No timeout, just query: no need for the ioctl. */
110 if (!absolute && !timeout)
111 return false;
112 }
113
114 /* Now use the libdrm query. */
115 r = amdgpu_cs_query_fence_status(&rfence->fence,
116 abs_timeout,
117 AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE,
118 &expired);
119 if (r) {
120 fprintf(stderr, "amdgpu: amdgpu_cs_query_fence_status failed.\n");
121 return false;
122 }
123
124 if (expired) {
125 /* This variable can only transition from false to true, so it doesn't
126 * matter if threads race for it. */
127 rfence->signalled = true;
128 return true;
129 }
130 return false;
131 }
132
133 static bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws,
134 struct pipe_fence_handle *fence,
135 uint64_t timeout)
136 {
137 return amdgpu_fence_wait(fence, timeout, false);
138 }
139
140 static struct pipe_fence_handle *
141 amdgpu_cs_get_next_fence(struct radeon_winsys_cs *rcs)
142 {
143 struct amdgpu_cs *cs = amdgpu_cs(rcs);
144 struct pipe_fence_handle *fence = NULL;
145
146 if (cs->next_fence) {
147 amdgpu_fence_reference(&fence, cs->next_fence);
148 return fence;
149 }
150
151 fence = amdgpu_fence_create(cs->ctx,
152 cs->csc->request.ip_type,
153 cs->csc->request.ip_instance,
154 cs->csc->request.ring);
155 if (!fence)
156 return NULL;
157
158 amdgpu_fence_reference(&cs->next_fence, fence);
159 return fence;
160 }
161
162 /* CONTEXTS */
163
164 static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws)
165 {
166 struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx);
167 int r;
168 struct amdgpu_bo_alloc_request alloc_buffer = {};
169 amdgpu_bo_handle buf_handle;
170
171 if (!ctx)
172 return NULL;
173
174 ctx->ws = amdgpu_winsys(ws);
175 ctx->refcount = 1;
176
177 r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx);
178 if (r) {
179 fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r);
180 goto error_create;
181 }
182
183 alloc_buffer.alloc_size = ctx->ws->info.gart_page_size;
184 alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size;
185 alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
186
187 r = amdgpu_bo_alloc(ctx->ws->dev, &alloc_buffer, &buf_handle);
188 if (r) {
189 fprintf(stderr, "amdgpu: amdgpu_bo_alloc failed. (%i)\n", r);
190 goto error_user_fence_alloc;
191 }
192
193 r = amdgpu_bo_cpu_map(buf_handle, (void**)&ctx->user_fence_cpu_address_base);
194 if (r) {
195 fprintf(stderr, "amdgpu: amdgpu_bo_cpu_map failed. (%i)\n", r);
196 goto error_user_fence_map;
197 }
198
199 memset(ctx->user_fence_cpu_address_base, 0, alloc_buffer.alloc_size);
200 ctx->user_fence_bo = buf_handle;
201
202 return (struct radeon_winsys_ctx*)ctx;
203
204 error_user_fence_map:
205 amdgpu_bo_free(buf_handle);
206 error_user_fence_alloc:
207 amdgpu_cs_ctx_free(ctx->ctx);
208 error_create:
209 FREE(ctx);
210 return NULL;
211 }
212
213 static void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
214 {
215 amdgpu_ctx_unref((struct amdgpu_ctx*)rwctx);
216 }
217
218 static enum pipe_reset_status
219 amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
220 {
221 struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
222 uint32_t result, hangs;
223 int r;
224
225 r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs);
226 if (r) {
227 fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r);
228 return PIPE_NO_RESET;
229 }
230
231 switch (result) {
232 case AMDGPU_CTX_GUILTY_RESET:
233 return PIPE_GUILTY_CONTEXT_RESET;
234 case AMDGPU_CTX_INNOCENT_RESET:
235 return PIPE_INNOCENT_CONTEXT_RESET;
236 case AMDGPU_CTX_UNKNOWN_RESET:
237 return PIPE_UNKNOWN_CONTEXT_RESET;
238 case AMDGPU_CTX_NO_RESET:
239 default:
240 return PIPE_NO_RESET;
241 }
242 }
243
244 /* COMMAND SUBMISSION */
245
246 static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs)
247 {
248 return cs->request.ip_type != AMDGPU_HW_IP_UVD &&
249 cs->request.ip_type != AMDGPU_HW_IP_VCE;
250 }
251
252 static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
253 {
254 return cs->ctx->ws->info.chip_class >= CIK &&
255 cs->ring_type == RING_GFX;
256 }
257
258 static unsigned amdgpu_cs_epilog_dws(enum ring_type ring_type)
259 {
260 if (ring_type == RING_GFX)
261 return 4; /* for chaining */
262
263 return 0;
264 }
265
266 int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo)
267 {
268 unsigned hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
269 int i = cs->buffer_indices_hashlist[hash];
270 struct amdgpu_cs_buffer *buffers;
271 int num_buffers;
272
273 if (bo->bo) {
274 buffers = cs->real_buffers;
275 num_buffers = cs->num_real_buffers;
276 } else {
277 buffers = cs->slab_buffers;
278 num_buffers = cs->num_slab_buffers;
279 }
280
281 /* not found or found */
282 if (i < 0 || (i < num_buffers && buffers[i].bo == bo))
283 return i;
284
285 /* Hash collision, look for the BO in the list of buffers linearly. */
286 for (i = num_buffers - 1; i >= 0; i--) {
287 if (buffers[i].bo == bo) {
288 /* Put this buffer in the hash list.
289 * This will prevent additional hash collisions if there are
290 * several consecutive lookup_buffer calls for the same buffer.
291 *
292 * Example: Assuming buffers A,B,C collide in the hash list,
293 * the following sequence of buffers:
294 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
295 * will collide here: ^ and here: ^,
296 * meaning that we should get very few collisions in the end. */
297 cs->buffer_indices_hashlist[hash] = i;
298 return i;
299 }
300 }
301 return -1;
302 }
303
304 static int
305 amdgpu_lookup_or_add_real_buffer(struct amdgpu_cs *acs, struct amdgpu_winsys_bo *bo)
306 {
307 struct amdgpu_cs_context *cs = acs->csc;
308 struct amdgpu_cs_buffer *buffer;
309 unsigned hash;
310 int idx = amdgpu_lookup_buffer(cs, bo);
311
312 if (idx >= 0)
313 return idx;
314
315 /* New buffer, check if the backing array is large enough. */
316 if (cs->num_real_buffers >= cs->max_real_buffers) {
317 unsigned new_max =
318 MAX2(cs->max_real_buffers + 16, (unsigned)(cs->max_real_buffers * 1.3));
319 struct amdgpu_cs_buffer *new_buffers;
320 amdgpu_bo_handle *new_handles;
321 uint8_t *new_flags;
322
323 new_buffers = MALLOC(new_max * sizeof(*new_buffers));
324 new_handles = MALLOC(new_max * sizeof(*new_handles));
325 new_flags = MALLOC(new_max * sizeof(*new_flags));
326
327 if (!new_buffers || !new_handles || !new_flags) {
328 fprintf(stderr, "amdgpu_lookup_or_add_buffer: allocation failed\n");
329 FREE(new_buffers);
330 FREE(new_handles);
331 FREE(new_flags);
332 return -1;
333 }
334
335 memcpy(new_buffers, cs->real_buffers, cs->num_real_buffers * sizeof(*new_buffers));
336 memcpy(new_handles, cs->handles, cs->num_real_buffers * sizeof(*new_handles));
337 memcpy(new_flags, cs->flags, cs->num_real_buffers * sizeof(*new_flags));
338
339 FREE(cs->real_buffers);
340 FREE(cs->handles);
341 FREE(cs->flags);
342
343 cs->max_real_buffers = new_max;
344 cs->real_buffers = new_buffers;
345 cs->handles = new_handles;
346 cs->flags = new_flags;
347 }
348
349 idx = cs->num_real_buffers;
350 buffer = &cs->real_buffers[idx];
351
352 memset(buffer, 0, sizeof(*buffer));
353 amdgpu_winsys_bo_reference(&buffer->bo, bo);
354 cs->handles[idx] = bo->bo;
355 cs->flags[idx] = 0;
356 p_atomic_inc(&bo->num_cs_references);
357 cs->num_real_buffers++;
358
359 hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
360 cs->buffer_indices_hashlist[hash] = idx;
361
362 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
363 acs->main.base.used_vram += bo->base.size;
364 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
365 acs->main.base.used_gart += bo->base.size;
366
367 return idx;
368 }
369
370 static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_cs *acs,
371 struct amdgpu_winsys_bo *bo)
372 {
373 struct amdgpu_cs_context *cs = acs->csc;
374 struct amdgpu_cs_buffer *buffer;
375 unsigned hash;
376 int idx = amdgpu_lookup_buffer(cs, bo);
377 int real_idx;
378
379 if (idx >= 0)
380 return idx;
381
382 real_idx = amdgpu_lookup_or_add_real_buffer(acs, bo->u.slab.real);
383 if (real_idx < 0)
384 return -1;
385
386 /* New buffer, check if the backing array is large enough. */
387 if (cs->num_slab_buffers >= cs->max_slab_buffers) {
388 unsigned new_max =
389 MAX2(cs->max_slab_buffers + 16, (unsigned)(cs->max_slab_buffers * 1.3));
390 struct amdgpu_cs_buffer *new_buffers;
391
392 new_buffers = REALLOC(cs->slab_buffers,
393 cs->max_slab_buffers * sizeof(*new_buffers),
394 new_max * sizeof(*new_buffers));
395 if (!new_buffers) {
396 fprintf(stderr, "amdgpu_lookup_or_add_slab_buffer: allocation failed\n");
397 return -1;
398 }
399
400 cs->max_slab_buffers = new_max;
401 cs->slab_buffers = new_buffers;
402 }
403
404 idx = cs->num_slab_buffers;
405 buffer = &cs->slab_buffers[idx];
406
407 memset(buffer, 0, sizeof(*buffer));
408 amdgpu_winsys_bo_reference(&buffer->bo, bo);
409 buffer->u.slab.real_idx = real_idx;
410 p_atomic_inc(&bo->num_cs_references);
411 cs->num_slab_buffers++;
412
413 hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
414 cs->buffer_indices_hashlist[hash] = idx;
415
416 return idx;
417 }
418
419 static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
420 struct pb_buffer *buf,
421 enum radeon_bo_usage usage,
422 enum radeon_bo_domain domains,
423 enum radeon_bo_priority priority)
424 {
425 /* Don't use the "domains" parameter. Amdgpu doesn't support changing
426 * the buffer placement during command submission.
427 */
428 struct amdgpu_cs *acs = amdgpu_cs(rcs);
429 struct amdgpu_cs_context *cs = acs->csc;
430 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
431 struct amdgpu_cs_buffer *buffer;
432 int index;
433
434 if (!bo->bo) {
435 index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
436 if (index < 0)
437 return 0;
438
439 buffer = &cs->slab_buffers[index];
440 buffer->usage |= usage;
441
442 usage &= ~RADEON_USAGE_SYNCHRONIZED;
443 index = buffer->u.slab.real_idx;
444 } else {
445 index = amdgpu_lookup_or_add_real_buffer(acs, bo);
446 if (index < 0)
447 return 0;
448 }
449
450 buffer = &cs->real_buffers[index];
451 buffer->u.real.priority_usage |= 1llu << priority;
452 buffer->usage |= usage;
453 cs->flags[index] = MAX2(cs->flags[index], priority / 4);
454 return index;
455 }
456
457 static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
458 {
459 struct pb_buffer *pb;
460 uint8_t *mapped;
461 unsigned buffer_size;
462
463 /* Always create a buffer that is at least as large as the maximum seen IB
464 * size, aligned to a power of two (and multiplied by 4 to reduce internal
465 * fragmentation if chaining is not available). Limit to 512k dwords, which
466 * is the largest power of two that fits into the size field of the
467 * INDIRECT_BUFFER packet.
468 */
469 if (amdgpu_cs_has_chaining(amdgpu_cs_from_ib(ib)))
470 buffer_size = 4 *util_next_power_of_two(ib->max_ib_size);
471 else
472 buffer_size = 4 *util_next_power_of_two(4 * ib->max_ib_size);
473
474 buffer_size = MIN2(buffer_size, 4 * 512 * 1024);
475
476 switch (ib->ib_type) {
477 case IB_CONST_PREAMBLE:
478 buffer_size = MAX2(buffer_size, 4 * 1024);
479 break;
480 case IB_CONST:
481 buffer_size = MAX2(buffer_size, 16 * 1024 * 4);
482 break;
483 case IB_MAIN:
484 buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
485 break;
486 default:
487 unreachable("unhandled IB type");
488 }
489
490 pb = ws->base.buffer_create(&ws->base, buffer_size,
491 ws->info.gart_page_size,
492 RADEON_DOMAIN_GTT,
493 RADEON_FLAG_CPU_ACCESS);
494 if (!pb)
495 return false;
496
497 mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
498 if (!mapped) {
499 pb_reference(&pb, NULL);
500 return false;
501 }
502
503 pb_reference(&ib->big_ib_buffer, pb);
504 pb_reference(&pb, NULL);
505
506 ib->ib_mapped = mapped;
507 ib->used_ib_space = 0;
508
509 return true;
510 }
511
512 static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type)
513 {
514 switch (ib_type) {
515 case IB_MAIN:
516 /* Smaller submits means the GPU gets busy sooner and there is less
517 * waiting for buffers and fences. Proof:
518 * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
519 */
520 return 20 * 1024;
521 case IB_CONST_PREAMBLE:
522 case IB_CONST:
523 /* There isn't really any reason to limit CE IB size beyond the natural
524 * limit implied by the main IB, except perhaps GTT size. Just return
525 * an extremely large value that we never get anywhere close to.
526 */
527 return 16 * 1024 * 1024;
528 default:
529 unreachable("bad ib_type");
530 }
531 }
532
533 static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
534 enum ib_type ib_type)
535 {
536 struct amdgpu_winsys *aws = (struct amdgpu_winsys*)ws;
537 /* Small IBs are better than big IBs, because the GPU goes idle quicker
538 * and there is less waiting for buffers and fences. Proof:
539 * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
540 */
541 struct amdgpu_ib *ib = NULL;
542 struct amdgpu_cs_ib_info *info = &cs->csc->ib[ib_type];
543 unsigned ib_size = 0;
544
545 switch (ib_type) {
546 case IB_CONST_PREAMBLE:
547 ib = &cs->const_preamble_ib;
548 ib_size = 256 * 4;
549 break;
550 case IB_CONST:
551 ib = &cs->const_ib;
552 ib_size = 8 * 1024 * 4;
553 break;
554 case IB_MAIN:
555 ib = &cs->main;
556 ib_size = 4 * 1024 * 4;
557 break;
558 default:
559 unreachable("unhandled IB type");
560 }
561
562 if (!amdgpu_cs_has_chaining(cs)) {
563 ib_size = MAX2(ib_size,
564 4 * MIN2(util_next_power_of_two(ib->max_ib_size),
565 amdgpu_ib_max_submit_dwords(ib_type)));
566 }
567
568 ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32;
569
570 ib->base.prev_dw = 0;
571 ib->base.num_prev = 0;
572 ib->base.current.cdw = 0;
573 ib->base.current.buf = NULL;
574
575 /* Allocate a new buffer for IBs if the current buffer is all used. */
576 if (!ib->big_ib_buffer ||
577 ib->used_ib_space + ib_size > ib->big_ib_buffer->size) {
578 if (!amdgpu_ib_new_buffer(aws, ib))
579 return false;
580 }
581
582 info->ib_mc_address = amdgpu_winsys_bo(ib->big_ib_buffer)->va +
583 ib->used_ib_space;
584 info->size = 0;
585 ib->ptr_ib_size = &info->size;
586
587 amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer,
588 RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
589
590 ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
591
592 ib_size = ib->big_ib_buffer->size - ib->used_ib_space;
593 ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs->ring_type);
594 return true;
595 }
596
597 static void amdgpu_ib_finalize(struct amdgpu_ib *ib)
598 {
599 *ib->ptr_ib_size |= ib->base.current.cdw;
600 ib->used_ib_space += ib->base.current.cdw * 4;
601 ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + ib->base.current.cdw);
602 }
603
604 static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
605 enum ring_type ring_type)
606 {
607 int i;
608
609 switch (ring_type) {
610 case RING_DMA:
611 cs->request.ip_type = AMDGPU_HW_IP_DMA;
612 break;
613
614 case RING_UVD:
615 cs->request.ip_type = AMDGPU_HW_IP_UVD;
616 break;
617
618 case RING_VCE:
619 cs->request.ip_type = AMDGPU_HW_IP_VCE;
620 break;
621
622 case RING_COMPUTE:
623 cs->request.ip_type = AMDGPU_HW_IP_COMPUTE;
624 break;
625
626 default:
627 case RING_GFX:
628 cs->request.ip_type = AMDGPU_HW_IP_GFX;
629 break;
630 }
631
632 for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
633 cs->buffer_indices_hashlist[i] = -1;
634 }
635
636 cs->request.number_of_ibs = 1;
637 cs->request.ibs = &cs->ib[IB_MAIN];
638
639 cs->ib[IB_CONST].flags = AMDGPU_IB_FLAG_CE;
640 cs->ib[IB_CONST_PREAMBLE].flags = AMDGPU_IB_FLAG_CE |
641 AMDGPU_IB_FLAG_PREAMBLE;
642
643 return true;
644 }
645
646 static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
647 {
648 unsigned i;
649
650 for (i = 0; i < cs->num_real_buffers; i++) {
651 p_atomic_dec(&cs->real_buffers[i].bo->num_cs_references);
652 amdgpu_winsys_bo_reference(&cs->real_buffers[i].bo, NULL);
653 }
654 for (i = 0; i < cs->num_slab_buffers; i++) {
655 p_atomic_dec(&cs->slab_buffers[i].bo->num_cs_references);
656 amdgpu_winsys_bo_reference(&cs->slab_buffers[i].bo, NULL);
657 }
658
659 cs->num_real_buffers = 0;
660 cs->num_slab_buffers = 0;
661 amdgpu_fence_reference(&cs->fence, NULL);
662
663 for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
664 cs->buffer_indices_hashlist[i] = -1;
665 }
666 }
667
668 static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
669 {
670 amdgpu_cs_context_cleanup(cs);
671 FREE(cs->flags);
672 FREE(cs->real_buffers);
673 FREE(cs->handles);
674 FREE(cs->slab_buffers);
675 FREE(cs->request.dependencies);
676 }
677
678
679 static struct radeon_winsys_cs *
680 amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
681 enum ring_type ring_type,
682 void (*flush)(void *ctx, unsigned flags,
683 struct pipe_fence_handle **fence),
684 void *flush_ctx)
685 {
686 struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
687 struct amdgpu_cs *cs;
688
689 cs = CALLOC_STRUCT(amdgpu_cs);
690 if (!cs) {
691 return NULL;
692 }
693
694 util_queue_fence_init(&cs->flush_completed);
695
696 cs->ctx = ctx;
697 cs->flush_cs = flush;
698 cs->flush_data = flush_ctx;
699 cs->ring_type = ring_type;
700
701 cs->main.ib_type = IB_MAIN;
702 cs->const_ib.ib_type = IB_CONST;
703 cs->const_preamble_ib.ib_type = IB_CONST_PREAMBLE;
704
705 if (!amdgpu_init_cs_context(&cs->csc1, ring_type)) {
706 FREE(cs);
707 return NULL;
708 }
709
710 if (!amdgpu_init_cs_context(&cs->csc2, ring_type)) {
711 amdgpu_destroy_cs_context(&cs->csc1);
712 FREE(cs);
713 return NULL;
714 }
715
716 /* Set the first submission context as current. */
717 cs->csc = &cs->csc1;
718 cs->cst = &cs->csc2;
719
720 if (!amdgpu_get_new_ib(&ctx->ws->base, cs, IB_MAIN)) {
721 amdgpu_destroy_cs_context(&cs->csc2);
722 amdgpu_destroy_cs_context(&cs->csc1);
723 FREE(cs);
724 return NULL;
725 }
726
727 p_atomic_inc(&ctx->ws->num_cs);
728 return &cs->main.base;
729 }
730
731 static struct radeon_winsys_cs *
732 amdgpu_cs_add_const_ib(struct radeon_winsys_cs *rcs)
733 {
734 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
735 struct amdgpu_winsys *ws = cs->ctx->ws;
736
737 /* only one const IB can be added */
738 if (cs->ring_type != RING_GFX || cs->const_ib.ib_mapped)
739 return NULL;
740
741 if (!amdgpu_get_new_ib(&ws->base, cs, IB_CONST))
742 return NULL;
743
744 cs->csc->request.number_of_ibs = 2;
745 cs->csc->request.ibs = &cs->csc->ib[IB_CONST];
746
747 cs->cst->request.number_of_ibs = 2;
748 cs->cst->request.ibs = &cs->cst->ib[IB_CONST];
749
750 return &cs->const_ib.base;
751 }
752
753 static struct radeon_winsys_cs *
754 amdgpu_cs_add_const_preamble_ib(struct radeon_winsys_cs *rcs)
755 {
756 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
757 struct amdgpu_winsys *ws = cs->ctx->ws;
758
759 /* only one const preamble IB can be added and only when the const IB has
760 * also been mapped */
761 if (cs->ring_type != RING_GFX || !cs->const_ib.ib_mapped ||
762 cs->const_preamble_ib.ib_mapped)
763 return NULL;
764
765 if (!amdgpu_get_new_ib(&ws->base, cs, IB_CONST_PREAMBLE))
766 return NULL;
767
768 cs->csc->request.number_of_ibs = 3;
769 cs->csc->request.ibs = &cs->csc->ib[IB_CONST_PREAMBLE];
770
771 cs->cst->request.number_of_ibs = 3;
772 cs->cst->request.ibs = &cs->cst->ib[IB_CONST_PREAMBLE];
773
774 return &cs->const_preamble_ib.base;
775 }
776
777 static bool amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
778 {
779 return true;
780 }
781
782 static bool amdgpu_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
783 {
784 struct amdgpu_ib *ib = amdgpu_ib(rcs);
785 struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
786 unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw;
787 uint64_t va;
788 uint32_t *new_ptr_ib_size;
789
790 assert(rcs->current.cdw <= rcs->current.max_dw);
791
792 if (requested_size > amdgpu_ib_max_submit_dwords(ib->ib_type))
793 return false;
794
795 ib->max_ib_size = MAX2(ib->max_ib_size, requested_size);
796
797 if (rcs->current.max_dw - rcs->current.cdw >= dw)
798 return true;
799
800 if (!amdgpu_cs_has_chaining(cs))
801 return false;
802
803 /* Allocate a new chunk */
804 if (rcs->num_prev >= rcs->max_prev) {
805 unsigned new_max_prev = MAX2(1, 2 * rcs->max_prev);
806 struct radeon_winsys_cs_chunk *new_prev;
807
808 new_prev = REALLOC(rcs->prev,
809 sizeof(*new_prev) * rcs->max_prev,
810 sizeof(*new_prev) * new_max_prev);
811 if (!new_prev)
812 return false;
813
814 rcs->prev = new_prev;
815 rcs->max_prev = new_max_prev;
816 }
817
818 if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib))
819 return false;
820
821 assert(ib->used_ib_space == 0);
822 va = amdgpu_winsys_bo(ib->big_ib_buffer)->va;
823
824 /* This space was originally reserved. */
825 rcs->current.max_dw += 4;
826 assert(ib->used_ib_space + 4 * rcs->current.max_dw <= ib->big_ib_buffer->size);
827
828 /* Pad with NOPs and add INDIRECT_BUFFER packet */
829 while ((rcs->current.cdw & 7) != 4)
830 radeon_emit(rcs, 0xffff1000); /* type3 nop packet */
831
832 radeon_emit(rcs, PKT3(ib->ib_type == IB_MAIN ? PKT3_INDIRECT_BUFFER_CIK
833 : PKT3_INDIRECT_BUFFER_CONST, 2, 0));
834 radeon_emit(rcs, va);
835 radeon_emit(rcs, va >> 32);
836 new_ptr_ib_size = &rcs->current.buf[rcs->current.cdw];
837 radeon_emit(rcs, S_3F2_CHAIN(1) | S_3F2_VALID(1));
838
839 assert((rcs->current.cdw & 7) == 0);
840 assert(rcs->current.cdw <= rcs->current.max_dw);
841
842 *ib->ptr_ib_size |= rcs->current.cdw;
843 ib->ptr_ib_size = new_ptr_ib_size;
844
845 /* Hook up the new chunk */
846 rcs->prev[rcs->num_prev].buf = rcs->current.buf;
847 rcs->prev[rcs->num_prev].cdw = rcs->current.cdw;
848 rcs->prev[rcs->num_prev].max_dw = rcs->current.cdw; /* no modifications */
849 rcs->num_prev++;
850
851 ib->base.prev_dw += ib->base.current.cdw;
852 ib->base.current.cdw = 0;
853
854 ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
855 ib->base.current.max_dw = ib->big_ib_buffer->size / 4 - amdgpu_cs_epilog_dws(cs->ring_type);
856
857 amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer,
858 RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
859
860 return true;
861 }
862
863 static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
864 struct radeon_bo_list_item *list)
865 {
866 struct amdgpu_cs_context *cs = amdgpu_cs(rcs)->csc;
867 int i;
868
869 if (list) {
870 for (i = 0; i < cs->num_real_buffers; i++) {
871 list[i].bo_size = cs->real_buffers[i].bo->base.size;
872 list[i].vm_address = cs->real_buffers[i].bo->va;
873 list[i].priority_usage = cs->real_buffers[i].u.real.priority_usage;
874 }
875 }
876 return cs->num_real_buffers;
877 }
878
879 DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
880
881 static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs,
882 struct amdgpu_cs_buffer *buffer)
883 {
884 struct amdgpu_cs_context *cs = acs->csc;
885 struct amdgpu_winsys_bo *bo = buffer->bo;
886 struct amdgpu_cs_fence *dep;
887 unsigned new_num_fences = 0;
888
889 for (unsigned j = 0; j < bo->num_fences; ++j) {
890 struct amdgpu_fence *bo_fence = (void *)bo->fences[j];
891 unsigned idx;
892
893 if (bo_fence->ctx == acs->ctx &&
894 bo_fence->fence.ip_type == cs->request.ip_type &&
895 bo_fence->fence.ip_instance == cs->request.ip_instance &&
896 bo_fence->fence.ring == cs->request.ring)
897 continue;
898
899 if (amdgpu_fence_wait((void *)bo_fence, 0, false))
900 continue;
901
902 amdgpu_fence_reference(&bo->fences[new_num_fences], bo->fences[j]);
903 new_num_fences++;
904
905 if (!(buffer->usage & RADEON_USAGE_SYNCHRONIZED))
906 continue;
907
908 if (bo_fence->submission_in_progress)
909 os_wait_until_zero(&bo_fence->submission_in_progress,
910 PIPE_TIMEOUT_INFINITE);
911
912 idx = cs->request.number_of_dependencies++;
913 if (idx >= cs->max_dependencies) {
914 unsigned size;
915
916 cs->max_dependencies = idx + 8;
917 size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
918 cs->request.dependencies = realloc(cs->request.dependencies, size);
919 }
920
921 dep = &cs->request.dependencies[idx];
922 memcpy(dep, &bo_fence->fence, sizeof(*dep));
923 }
924
925 for (unsigned j = new_num_fences; j < bo->num_fences; ++j)
926 amdgpu_fence_reference(&bo->fences[j], NULL);
927
928 bo->num_fences = new_num_fences;
929 }
930
931 /* Since the kernel driver doesn't synchronize execution between different
932 * rings automatically, we have to add fence dependencies manually.
933 */
934 static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
935 {
936 struct amdgpu_cs_context *cs = acs->csc;
937 int i;
938
939 cs->request.number_of_dependencies = 0;
940
941 for (i = 0; i < cs->num_real_buffers; i++)
942 amdgpu_add_fence_dependency(acs, &cs->real_buffers[i]);
943 for (i = 0; i < cs->num_slab_buffers; i++)
944 amdgpu_add_fence_dependency(acs, &cs->slab_buffers[i]);
945 }
946
947 static void amdgpu_add_fence(struct amdgpu_winsys_bo *bo,
948 struct pipe_fence_handle *fence)
949 {
950 if (bo->num_fences >= bo->max_fences) {
951 unsigned new_max_fences = MAX2(1, bo->max_fences * 2);
952 struct pipe_fence_handle **new_fences =
953 REALLOC(bo->fences,
954 bo->num_fences * sizeof(*new_fences),
955 new_max_fences * sizeof(*new_fences));
956 if (new_fences) {
957 bo->fences = new_fences;
958 bo->max_fences = new_max_fences;
959 } else {
960 fprintf(stderr, "amdgpu_add_fence: allocation failure, dropping fence\n");
961 if (!bo->num_fences)
962 return;
963
964 bo->num_fences--; /* prefer to keep a more recent fence if possible */
965 amdgpu_fence_reference(&bo->fences[bo->num_fences], NULL);
966 }
967 }
968
969 bo->fences[bo->num_fences] = NULL;
970 amdgpu_fence_reference(&bo->fences[bo->num_fences], fence);
971 bo->num_fences++;
972 }
973
974 void amdgpu_cs_submit_ib(void *job, int thread_index)
975 {
976 struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
977 struct amdgpu_winsys *ws = acs->ctx->ws;
978 struct amdgpu_cs_context *cs = acs->cst;
979 int i, r;
980
981 cs->request.fence_info.handle = NULL;
982 if (amdgpu_cs_has_user_fence(cs)) {
983 cs->request.fence_info.handle = acs->ctx->user_fence_bo;
984 cs->request.fence_info.offset = acs->ring_type;
985 }
986
987 /* Create the buffer list.
988 * Use a buffer list containing all allocated buffers if requested.
989 */
990 if (debug_get_option_all_bos()) {
991 struct amdgpu_winsys_bo *bo;
992 amdgpu_bo_handle *handles;
993 unsigned num = 0;
994
995 pipe_mutex_lock(ws->global_bo_list_lock);
996
997 handles = malloc(sizeof(handles[0]) * ws->num_buffers);
998 if (!handles) {
999 pipe_mutex_unlock(ws->global_bo_list_lock);
1000 amdgpu_cs_context_cleanup(cs);
1001 cs->error_code = -ENOMEM;
1002 return;
1003 }
1004
1005 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, u.real.global_list_item) {
1006 assert(num < ws->num_buffers);
1007 handles[num++] = bo->bo;
1008 }
1009
1010 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
1011 handles, NULL,
1012 &cs->request.resources);
1013 free(handles);
1014 pipe_mutex_unlock(ws->global_bo_list_lock);
1015 } else {
1016 r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers,
1017 cs->handles, cs->flags,
1018 &cs->request.resources);
1019 }
1020
1021 if (r) {
1022 fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
1023 cs->request.resources = NULL;
1024 amdgpu_fence_signalled(cs->fence);
1025 cs->error_code = r;
1026 goto cleanup;
1027 }
1028
1029 r = amdgpu_cs_submit(acs->ctx->ctx, 0, &cs->request, 1);
1030 cs->error_code = r;
1031 if (r) {
1032 if (r == -ENOMEM)
1033 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
1034 else
1035 fprintf(stderr, "amdgpu: The CS has been rejected, "
1036 "see dmesg for more information (%i).\n", r);
1037
1038 amdgpu_fence_signalled(cs->fence);
1039 } else {
1040 /* Success. */
1041 uint64_t *user_fence = NULL;
1042 if (amdgpu_cs_has_user_fence(cs))
1043 user_fence = acs->ctx->user_fence_cpu_address_base +
1044 cs->request.fence_info.offset;
1045 amdgpu_fence_submitted(cs->fence, &cs->request, user_fence);
1046 }
1047
1048 /* Cleanup. */
1049 if (cs->request.resources)
1050 amdgpu_bo_list_destroy(cs->request.resources);
1051
1052 cleanup:
1053 for (i = 0; i < cs->num_real_buffers; i++)
1054 p_atomic_dec(&cs->real_buffers[i].bo->num_active_ioctls);
1055 for (i = 0; i < cs->num_slab_buffers; i++)
1056 p_atomic_dec(&cs->slab_buffers[i].bo->num_active_ioctls);
1057
1058 amdgpu_cs_context_cleanup(cs);
1059 }
1060
1061 /* Make sure the previous submission is completed. */
1062 void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
1063 {
1064 struct amdgpu_cs *cs = amdgpu_cs(rcs);
1065 struct amdgpu_winsys *ws = cs->ctx->ws;
1066
1067 /* Wait for any pending ioctl of this CS to complete. */
1068 if (util_queue_is_initialized(&ws->cs_queue))
1069 util_queue_job_wait(&cs->flush_completed);
1070 }
1071
1072 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
1073
1074 static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
1075 unsigned flags,
1076 struct pipe_fence_handle **fence)
1077 {
1078 struct amdgpu_cs *cs = amdgpu_cs(rcs);
1079 struct amdgpu_winsys *ws = cs->ctx->ws;
1080 int error_code = 0;
1081
1082 rcs->current.max_dw += amdgpu_cs_epilog_dws(cs->ring_type);
1083
1084 switch (cs->ring_type) {
1085 case RING_DMA:
1086 /* pad DMA ring to 8 DWs */
1087 if (ws->info.chip_class <= SI) {
1088 while (rcs->current.cdw & 7)
1089 radeon_emit(rcs, 0xf0000000); /* NOP packet */
1090 } else {
1091 while (rcs->current.cdw & 7)
1092 radeon_emit(rcs, 0x00000000); /* NOP packet */
1093 }
1094 break;
1095 case RING_GFX:
1096 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
1097 if (ws->info.gfx_ib_pad_with_type2) {
1098 while (rcs->current.cdw & 7)
1099 radeon_emit(rcs, 0x80000000); /* type2 nop packet */
1100 } else {
1101 while (rcs->current.cdw & 7)
1102 radeon_emit(rcs, 0xffff1000); /* type3 nop packet */
1103 }
1104
1105 /* Also pad the const IB. */
1106 if (cs->const_ib.ib_mapped)
1107 while (!cs->const_ib.base.current.cdw || (cs->const_ib.base.current.cdw & 7))
1108 radeon_emit(&cs->const_ib.base, 0xffff1000); /* type3 nop packet */
1109
1110 if (cs->const_preamble_ib.ib_mapped)
1111 while (!cs->const_preamble_ib.base.current.cdw || (cs->const_preamble_ib.base.current.cdw & 7))
1112 radeon_emit(&cs->const_preamble_ib.base, 0xffff1000);
1113 break;
1114 case RING_UVD:
1115 while (rcs->current.cdw & 15)
1116 radeon_emit(rcs, 0x80000000); /* type2 nop packet */
1117 break;
1118 default:
1119 break;
1120 }
1121
1122 if (rcs->current.cdw > rcs->current.max_dw) {
1123 fprintf(stderr, "amdgpu: command stream overflowed\n");
1124 }
1125
1126 /* If the CS is not empty or overflowed.... */
1127 if (radeon_emitted(&cs->main.base, 0) &&
1128 cs->main.base.current.cdw <= cs->main.base.current.max_dw &&
1129 !debug_get_option_noop()) {
1130 struct amdgpu_cs_context *cur = cs->csc;
1131 unsigned i, num_buffers;
1132
1133 /* Set IB sizes. */
1134 amdgpu_ib_finalize(&cs->main);
1135
1136 if (cs->const_ib.ib_mapped)
1137 amdgpu_ib_finalize(&cs->const_ib);
1138
1139 if (cs->const_preamble_ib.ib_mapped)
1140 amdgpu_ib_finalize(&cs->const_preamble_ib);
1141
1142 /* Create a fence. */
1143 amdgpu_fence_reference(&cur->fence, NULL);
1144 if (cs->next_fence) {
1145 /* just move the reference */
1146 cur->fence = cs->next_fence;
1147 cs->next_fence = NULL;
1148 } else {
1149 cur->fence = amdgpu_fence_create(cs->ctx,
1150 cur->request.ip_type,
1151 cur->request.ip_instance,
1152 cur->request.ring);
1153 }
1154 if (fence)
1155 amdgpu_fence_reference(fence, cur->fence);
1156
1157 /* Prepare buffers. */
1158 pipe_mutex_lock(ws->bo_fence_lock);
1159 amdgpu_add_fence_dependencies(cs);
1160
1161 num_buffers = cur->num_real_buffers;
1162 for (i = 0; i < num_buffers; i++) {
1163 struct amdgpu_winsys_bo *bo = cur->real_buffers[i].bo;
1164 p_atomic_inc(&bo->num_active_ioctls);
1165 amdgpu_add_fence(bo, cur->fence);
1166 }
1167
1168 num_buffers = cur->num_slab_buffers;
1169 for (i = 0; i < num_buffers; i++) {
1170 struct amdgpu_winsys_bo *bo = cur->slab_buffers[i].bo;
1171 p_atomic_inc(&bo->num_active_ioctls);
1172 amdgpu_add_fence(bo, cur->fence);
1173 }
1174 pipe_mutex_unlock(ws->bo_fence_lock);
1175
1176 amdgpu_cs_sync_flush(rcs);
1177
1178 /* Swap command streams. "cst" is going to be submitted. */
1179 cs->csc = cs->cst;
1180 cs->cst = cur;
1181
1182 /* Submit. */
1183 if ((flags & RADEON_FLUSH_ASYNC) &&
1184 util_queue_is_initialized(&ws->cs_queue)) {
1185 util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed,
1186 amdgpu_cs_submit_ib, NULL);
1187 } else {
1188 amdgpu_cs_submit_ib(cs, 0);
1189 error_code = cs->cst->error_code;
1190 }
1191 } else {
1192 amdgpu_cs_context_cleanup(cs->csc);
1193 }
1194
1195 amdgpu_get_new_ib(&ws->base, cs, IB_MAIN);
1196 if (cs->const_ib.ib_mapped)
1197 amdgpu_get_new_ib(&ws->base, cs, IB_CONST);
1198 if (cs->const_preamble_ib.ib_mapped)
1199 amdgpu_get_new_ib(&ws->base, cs, IB_CONST_PREAMBLE);
1200
1201 cs->main.base.used_gart = 0;
1202 cs->main.base.used_vram = 0;
1203
1204 ws->num_cs_flushes++;
1205 return error_code;
1206 }
1207
1208 static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
1209 {
1210 struct amdgpu_cs *cs = amdgpu_cs(rcs);
1211
1212 amdgpu_cs_sync_flush(rcs);
1213 util_queue_fence_destroy(&cs->flush_completed);
1214 p_atomic_dec(&cs->ctx->ws->num_cs);
1215 pb_reference(&cs->main.big_ib_buffer, NULL);
1216 FREE(cs->main.base.prev);
1217 pb_reference(&cs->const_ib.big_ib_buffer, NULL);
1218 FREE(cs->const_ib.base.prev);
1219 pb_reference(&cs->const_preamble_ib.big_ib_buffer, NULL);
1220 FREE(cs->const_preamble_ib.base.prev);
1221 amdgpu_destroy_cs_context(&cs->csc1);
1222 amdgpu_destroy_cs_context(&cs->csc2);
1223 amdgpu_fence_reference(&cs->next_fence, NULL);
1224 FREE(cs);
1225 }
1226
1227 static bool amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs,
1228 struct pb_buffer *_buf,
1229 enum radeon_bo_usage usage)
1230 {
1231 struct amdgpu_cs *cs = amdgpu_cs(rcs);
1232 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf;
1233
1234 return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
1235 }
1236
1237 void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
1238 {
1239 ws->base.ctx_create = amdgpu_ctx_create;
1240 ws->base.ctx_destroy = amdgpu_ctx_destroy;
1241 ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
1242 ws->base.cs_create = amdgpu_cs_create;
1243 ws->base.cs_add_const_ib = amdgpu_cs_add_const_ib;
1244 ws->base.cs_add_const_preamble_ib = amdgpu_cs_add_const_preamble_ib;
1245 ws->base.cs_destroy = amdgpu_cs_destroy;
1246 ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
1247 ws->base.cs_validate = amdgpu_cs_validate;
1248 ws->base.cs_check_space = amdgpu_cs_check_space;
1249 ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
1250 ws->base.cs_flush = amdgpu_cs_flush;
1251 ws->base.cs_get_next_fence = amdgpu_cs_get_next_fence;
1252 ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
1253 ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
1254 ws->base.fence_wait = amdgpu_fence_wait_rel_timeout;
1255 ws->base.fence_reference = amdgpu_fence_reference;
1256 }