radeonsi: remove 'Authors:' comments
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "r600_pipe_common.h"
25 #include "r600_cs.h"
26 #include "tgsi/tgsi_parse.h"
27 #include "util/list.h"
28 #include "util/u_draw_quad.h"
29 #include "util/u_memory.h"
30 #include "util/u_format_s3tc.h"
31 #include "util/u_upload_mgr.h"
32 #include "os/os_time.h"
33 #include "vl/vl_decoder.h"
34 #include "vl/vl_video_buffer.h"
35 #include "radeon/radeon_video.h"
36 #include "amd/common/sid.h"
37 #include <inttypes.h>
38 #include <sys/utsname.h>
39 #include <libsync.h>
40
41 #include <llvm-c/TargetMachine.h>
42
43
44 struct r600_multi_fence {
45 struct pipe_reference reference;
46 struct pipe_fence_handle *gfx;
47 struct pipe_fence_handle *sdma;
48
49 /* If the context wasn't flushed at fence creation, this is non-NULL. */
50 struct {
51 struct r600_common_context *ctx;
52 unsigned ib_index;
53 } gfx_unflushed;
54 };
55
56 /*
57 * shader binary helpers.
58 */
59 void si_radeon_shader_binary_init(struct ac_shader_binary *b)
60 {
61 memset(b, 0, sizeof(*b));
62 }
63
64 void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
65 {
66 if (!b)
67 return;
68 FREE(b->code);
69 FREE(b->config);
70 FREE(b->rodata);
71 FREE(b->global_symbol_offsets);
72 FREE(b->relocs);
73 FREE(b->disasm_string);
74 FREE(b->llvm_ir_string);
75 }
76
77 /*
78 * pipe_context
79 */
80
81 /**
82 * Write an EOP event.
83 *
84 * \param event EVENT_TYPE_*
85 * \param event_flags Optional cache flush flags (TC)
86 * \param data_sel 1 = fence, 3 = timestamp
87 * \param buf Buffer
88 * \param va GPU address
89 * \param old_value Previous fence value (for a bug workaround)
90 * \param new_value Fence value to write for this event.
91 */
92 void si_gfx_write_event_eop(struct r600_common_context *ctx,
93 unsigned event, unsigned event_flags,
94 unsigned data_sel,
95 struct r600_resource *buf, uint64_t va,
96 uint32_t new_fence, unsigned query_type)
97 {
98 struct radeon_winsys_cs *cs = ctx->gfx.cs;
99 unsigned op = EVENT_TYPE(event) |
100 EVENT_INDEX(5) |
101 event_flags;
102 unsigned sel = EOP_DATA_SEL(data_sel);
103
104 /* Wait for write confirmation before writing data, but don't send
105 * an interrupt. */
106 if (data_sel != EOP_DATA_SEL_DISCARD)
107 sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
108
109 if (ctx->chip_class >= GFX9) {
110 /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
111 * counters) must immediately precede every timestamp event to
112 * prevent a GPU hang on GFX9.
113 *
114 * Occlusion queries don't need to do it here, because they
115 * always do ZPASS_DONE before the timestamp.
116 */
117 if (ctx->chip_class == GFX9 &&
118 query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
119 query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
120 query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
121 struct r600_resource *scratch = ctx->eop_bug_scratch;
122
123 assert(16 * ctx->screen->info.num_render_backends <=
124 scratch->b.b.width0);
125 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
126 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
127 radeon_emit(cs, scratch->gpu_address);
128 radeon_emit(cs, scratch->gpu_address >> 32);
129
130 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
131 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
132 }
133
134 radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
135 radeon_emit(cs, op);
136 radeon_emit(cs, sel);
137 radeon_emit(cs, va); /* address lo */
138 radeon_emit(cs, va >> 32); /* address hi */
139 radeon_emit(cs, new_fence); /* immediate data lo */
140 radeon_emit(cs, 0); /* immediate data hi */
141 radeon_emit(cs, 0); /* unused */
142 } else {
143 if (ctx->chip_class == CIK ||
144 ctx->chip_class == VI) {
145 struct r600_resource *scratch = ctx->eop_bug_scratch;
146 uint64_t va = scratch->gpu_address;
147
148 /* Two EOP events are required to make all engines go idle
149 * (and optional cache flushes executed) before the timestamp
150 * is written.
151 */
152 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
153 radeon_emit(cs, op);
154 radeon_emit(cs, va);
155 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
156 radeon_emit(cs, 0); /* immediate data */
157 radeon_emit(cs, 0); /* unused */
158
159 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
160 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
161 }
162
163 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
164 radeon_emit(cs, op);
165 radeon_emit(cs, va);
166 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
167 radeon_emit(cs, new_fence); /* immediate data */
168 radeon_emit(cs, 0); /* unused */
169 }
170
171 if (buf) {
172 radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
173 RADEON_PRIO_QUERY);
174 }
175 }
176
177 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
178 {
179 unsigned dwords = 6;
180
181 if (screen->chip_class == CIK ||
182 screen->chip_class == VI)
183 dwords *= 2;
184
185 if (!screen->info.has_virtual_memory)
186 dwords += 2;
187
188 return dwords;
189 }
190
191 void si_gfx_wait_fence(struct r600_common_context *ctx,
192 uint64_t va, uint32_t ref, uint32_t mask)
193 {
194 struct radeon_winsys_cs *cs = ctx->gfx.cs;
195
196 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
197 radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
198 radeon_emit(cs, va);
199 radeon_emit(cs, va >> 32);
200 radeon_emit(cs, ref); /* reference value */
201 radeon_emit(cs, mask); /* mask */
202 radeon_emit(cs, 4); /* poll interval */
203 }
204
205 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
206 {
207 struct radeon_winsys_cs *cs = rctx->dma.cs;
208
209 /* NOP waits for idle on Evergreen and later. */
210 if (rctx->chip_class >= CIK)
211 radeon_emit(cs, 0x00000000); /* NOP */
212 else
213 radeon_emit(cs, 0xf0000000); /* NOP */
214 }
215
216 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
217 struct r600_resource *dst, struct r600_resource *src)
218 {
219 uint64_t vram = ctx->dma.cs->used_vram;
220 uint64_t gtt = ctx->dma.cs->used_gart;
221
222 if (dst) {
223 vram += dst->vram_usage;
224 gtt += dst->gart_usage;
225 }
226 if (src) {
227 vram += src->vram_usage;
228 gtt += src->gart_usage;
229 }
230
231 /* Flush the GFX IB if DMA depends on it. */
232 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
233 ((dst &&
234 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
235 RADEON_USAGE_READWRITE)) ||
236 (src &&
237 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
238 RADEON_USAGE_WRITE))))
239 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
240
241 /* Flush if there's not enough space, or if the memory usage per IB
242 * is too large.
243 *
244 * IBs using too little memory are limited by the IB submission overhead.
245 * IBs using too much memory are limited by the kernel/TTM overhead.
246 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
247 *
248 * This heuristic makes sure that DMA requests are executed
249 * very soon after the call is made and lowers memory usage.
250 * It improves texture upload performance by keeping the DMA
251 * engine busy while uploads are being submitted.
252 */
253 num_dw++; /* for emit_wait_idle below */
254 if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
255 ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
256 !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
257 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
258 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
259 }
260
261 /* Wait for idle if either buffer has been used in the IB before to
262 * prevent read-after-write hazards.
263 */
264 if ((dst &&
265 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
266 RADEON_USAGE_READWRITE)) ||
267 (src &&
268 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
269 RADEON_USAGE_WRITE)))
270 r600_dma_emit_wait_idle(ctx);
271
272 /* If GPUVM is not supported, the CS checker needs 2 entries
273 * in the buffer list per packet, which has to be done manually.
274 */
275 if (ctx->screen->info.has_virtual_memory) {
276 if (dst)
277 radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
278 RADEON_USAGE_WRITE,
279 RADEON_PRIO_SDMA_BUFFER);
280 if (src)
281 radeon_add_to_buffer_list(ctx, &ctx->dma, src,
282 RADEON_USAGE_READ,
283 RADEON_PRIO_SDMA_BUFFER);
284 }
285
286 /* this function is called before all DMA calls, so increment this. */
287 ctx->num_dma_calls++;
288 }
289
290 static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
291 {
292 }
293
294 void si_preflush_suspend_features(struct r600_common_context *ctx)
295 {
296 /* suspend queries */
297 if (!LIST_IS_EMPTY(&ctx->active_queries))
298 si_suspend_queries(ctx);
299 }
300
301 void si_postflush_resume_features(struct r600_common_context *ctx)
302 {
303 /* resume queries */
304 if (!LIST_IS_EMPTY(&ctx->active_queries))
305 si_resume_queries(ctx);
306 }
307
308 static void r600_add_fence_dependency(struct r600_common_context *rctx,
309 struct pipe_fence_handle *fence)
310 {
311 struct radeon_winsys *ws = rctx->ws;
312
313 if (rctx->dma.cs)
314 ws->cs_add_fence_dependency(rctx->dma.cs, fence);
315 ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
316 }
317
318 static void r600_fence_server_sync(struct pipe_context *ctx,
319 struct pipe_fence_handle *fence)
320 {
321 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
322 struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
323
324 /* Only amdgpu needs to handle fence dependencies (for fence imports).
325 * radeon synchronizes all rings by default and will not implement
326 * fence imports.
327 */
328 if (rctx->screen->info.drm_major == 2)
329 return;
330
331 /* Only imported fences need to be handled by fence_server_sync,
332 * because the winsys handles synchronizations automatically for BOs
333 * within the process.
334 *
335 * Simply skip unflushed fences here, and the winsys will drop no-op
336 * dependencies (i.e. dependencies within the same ring).
337 */
338 if (rfence->gfx_unflushed.ctx)
339 return;
340
341 /* All unflushed commands will not start execution before
342 * this fence dependency is signalled.
343 *
344 * Should we flush the context to allow more GPU parallelism?
345 */
346 if (rfence->sdma)
347 r600_add_fence_dependency(rctx, rfence->sdma);
348 if (rfence->gfx)
349 r600_add_fence_dependency(rctx, rfence->gfx);
350 }
351
352 static void r600_create_fence_fd(struct pipe_context *ctx,
353 struct pipe_fence_handle **pfence, int fd)
354 {
355 struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
356 struct radeon_winsys *ws = rscreen->ws;
357 struct r600_multi_fence *rfence;
358
359 *pfence = NULL;
360
361 if (!rscreen->info.has_sync_file)
362 return;
363
364 rfence = CALLOC_STRUCT(r600_multi_fence);
365 if (!rfence)
366 return;
367
368 pipe_reference_init(&rfence->reference, 1);
369 rfence->gfx = ws->fence_import_sync_file(ws, fd);
370 if (!rfence->gfx) {
371 FREE(rfence);
372 return;
373 }
374
375 *pfence = (struct pipe_fence_handle*)rfence;
376 }
377
378 static int r600_fence_get_fd(struct pipe_screen *screen,
379 struct pipe_fence_handle *fence)
380 {
381 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
382 struct radeon_winsys *ws = rscreen->ws;
383 struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
384 int gfx_fd = -1, sdma_fd = -1;
385
386 if (!rscreen->info.has_sync_file)
387 return -1;
388
389 /* Deferred fences aren't supported. */
390 assert(!rfence->gfx_unflushed.ctx);
391 if (rfence->gfx_unflushed.ctx)
392 return -1;
393
394 if (rfence->sdma) {
395 sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma);
396 if (sdma_fd == -1)
397 return -1;
398 }
399 if (rfence->gfx) {
400 gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx);
401 if (gfx_fd == -1) {
402 if (sdma_fd != -1)
403 close(sdma_fd);
404 return -1;
405 }
406 }
407
408 /* If we don't have FDs at this point, it means we don't have fences
409 * either. */
410 if (sdma_fd == -1)
411 return gfx_fd;
412 if (gfx_fd == -1)
413 return sdma_fd;
414
415 /* Get a fence that will be a combination of both fences. */
416 sync_accumulate("radeonsi", &gfx_fd, sdma_fd);
417 close(sdma_fd);
418 return gfx_fd;
419 }
420
421 static void r600_flush_from_st(struct pipe_context *ctx,
422 struct pipe_fence_handle **fence,
423 unsigned flags)
424 {
425 struct pipe_screen *screen = ctx->screen;
426 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
427 struct radeon_winsys *ws = rctx->ws;
428 struct pipe_fence_handle *gfx_fence = NULL;
429 struct pipe_fence_handle *sdma_fence = NULL;
430 bool deferred_fence = false;
431 unsigned rflags = RADEON_FLUSH_ASYNC;
432
433 if (flags & PIPE_FLUSH_END_OF_FRAME)
434 rflags |= RADEON_FLUSH_END_OF_FRAME;
435
436 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
437 if (rctx->dma.cs)
438 rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
439
440 if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
441 if (fence)
442 ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
443 if (!(flags & PIPE_FLUSH_DEFERRED))
444 ws->cs_sync_flush(rctx->gfx.cs);
445 } else {
446 /* Instead of flushing, create a deferred fence. Constraints:
447 * - The state tracker must allow a deferred flush.
448 * - The state tracker must request a fence.
449 * - fence_get_fd is not allowed.
450 * Thread safety in fence_finish must be ensured by the state tracker.
451 */
452 if (flags & PIPE_FLUSH_DEFERRED &&
453 !(flags & PIPE_FLUSH_FENCE_FD) &&
454 fence) {
455 gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
456 deferred_fence = true;
457 } else {
458 rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
459 }
460 }
461
462 /* Both engines can signal out of order, so we need to keep both fences. */
463 if (fence) {
464 struct r600_multi_fence *multi_fence =
465 CALLOC_STRUCT(r600_multi_fence);
466 if (!multi_fence) {
467 ws->fence_reference(&sdma_fence, NULL);
468 ws->fence_reference(&gfx_fence, NULL);
469 goto finish;
470 }
471
472 multi_fence->reference.count = 1;
473 /* If both fences are NULL, fence_finish will always return true. */
474 multi_fence->gfx = gfx_fence;
475 multi_fence->sdma = sdma_fence;
476
477 if (deferred_fence) {
478 multi_fence->gfx_unflushed.ctx = rctx;
479 multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
480 }
481
482 screen->fence_reference(screen, fence, NULL);
483 *fence = (struct pipe_fence_handle*)multi_fence;
484 }
485 finish:
486 if (!(flags & PIPE_FLUSH_DEFERRED)) {
487 if (rctx->dma.cs)
488 ws->cs_sync_flush(rctx->dma.cs);
489 ws->cs_sync_flush(rctx->gfx.cs);
490 }
491 }
492
493 static void r600_flush_dma_ring(void *ctx, unsigned flags,
494 struct pipe_fence_handle **fence)
495 {
496 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
497 struct radeon_winsys_cs *cs = rctx->dma.cs;
498 struct radeon_saved_cs saved;
499 bool check_vm =
500 (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
501 rctx->check_vm_faults;
502
503 if (!radeon_emitted(cs, 0)) {
504 if (fence)
505 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
506 return;
507 }
508
509 if (check_vm)
510 si_save_cs(rctx->ws, cs, &saved, true);
511
512 rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
513 if (fence)
514 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
515
516 if (check_vm) {
517 /* Use conservative timeout 800ms, after which we won't wait any
518 * longer and assume the GPU is hung.
519 */
520 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
521
522 rctx->check_vm_faults(rctx, &saved, RING_DMA);
523 si_clear_saved_cs(&saved);
524 }
525 }
526
527 /**
528 * Store a linearized copy of all chunks of \p cs together with the buffer
529 * list in \p saved.
530 */
531 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
532 struct radeon_saved_cs *saved, bool get_buffer_list)
533 {
534 uint32_t *buf;
535 unsigned i;
536
537 /* Save the IB chunks. */
538 saved->num_dw = cs->prev_dw + cs->current.cdw;
539 saved->ib = MALLOC(4 * saved->num_dw);
540 if (!saved->ib)
541 goto oom;
542
543 buf = saved->ib;
544 for (i = 0; i < cs->num_prev; ++i) {
545 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
546 buf += cs->prev[i].cdw;
547 }
548 memcpy(buf, cs->current.buf, cs->current.cdw * 4);
549
550 if (!get_buffer_list)
551 return;
552
553 /* Save the buffer list. */
554 saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
555 saved->bo_list = CALLOC(saved->bo_count,
556 sizeof(saved->bo_list[0]));
557 if (!saved->bo_list) {
558 FREE(saved->ib);
559 goto oom;
560 }
561 ws->cs_get_buffer_list(cs, saved->bo_list);
562
563 return;
564
565 oom:
566 fprintf(stderr, "%s: out of memory\n", __func__);
567 memset(saved, 0, sizeof(*saved));
568 }
569
570 void si_clear_saved_cs(struct radeon_saved_cs *saved)
571 {
572 FREE(saved->ib);
573 FREE(saved->bo_list);
574
575 memset(saved, 0, sizeof(*saved));
576 }
577
578 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
579 {
580 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
581 unsigned latest = rctx->ws->query_value(rctx->ws,
582 RADEON_GPU_RESET_COUNTER);
583
584 if (rctx->gpu_reset_counter == latest)
585 return PIPE_NO_RESET;
586
587 rctx->gpu_reset_counter = latest;
588 return PIPE_UNKNOWN_CONTEXT_RESET;
589 }
590
591 static void r600_set_debug_callback(struct pipe_context *ctx,
592 const struct pipe_debug_callback *cb)
593 {
594 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
595
596 if (cb)
597 rctx->debug = *cb;
598 else
599 memset(&rctx->debug, 0, sizeof(rctx->debug));
600 }
601
602 static void r600_set_device_reset_callback(struct pipe_context *ctx,
603 const struct pipe_device_reset_callback *cb)
604 {
605 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
606
607 if (cb)
608 rctx->device_reset_callback = *cb;
609 else
610 memset(&rctx->device_reset_callback, 0,
611 sizeof(rctx->device_reset_callback));
612 }
613
614 bool si_check_device_reset(struct r600_common_context *rctx)
615 {
616 enum pipe_reset_status status;
617
618 if (!rctx->device_reset_callback.reset)
619 return false;
620
621 if (!rctx->b.get_device_reset_status)
622 return false;
623
624 status = rctx->b.get_device_reset_status(&rctx->b);
625 if (status == PIPE_NO_RESET)
626 return false;
627
628 rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
629 return true;
630 }
631
632 static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
633 struct pipe_resource *dst,
634 uint64_t offset, uint64_t size,
635 unsigned value)
636 {
637 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
638
639 rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE);
640 }
641
642 static bool r600_resource_commit(struct pipe_context *pctx,
643 struct pipe_resource *resource,
644 unsigned level, struct pipe_box *box,
645 bool commit)
646 {
647 struct r600_common_context *ctx = (struct r600_common_context *)pctx;
648 struct r600_resource *res = r600_resource(resource);
649
650 /*
651 * Since buffer commitment changes cannot be pipelined, we need to
652 * (a) flush any pending commands that refer to the buffer we're about
653 * to change, and
654 * (b) wait for threaded submit to finish, including those that were
655 * triggered by some other, earlier operation.
656 */
657 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
658 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
659 res->buf, RADEON_USAGE_READWRITE)) {
660 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
661 }
662 if (radeon_emitted(ctx->dma.cs, 0) &&
663 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
664 res->buf, RADEON_USAGE_READWRITE)) {
665 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
666 }
667
668 ctx->ws->cs_sync_flush(ctx->dma.cs);
669 ctx->ws->cs_sync_flush(ctx->gfx.cs);
670
671 assert(resource->target == PIPE_BUFFER);
672
673 return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
674 }
675
676 bool si_common_context_init(struct r600_common_context *rctx,
677 struct r600_common_screen *rscreen,
678 unsigned context_flags)
679 {
680 slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
681 slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
682
683 rctx->screen = rscreen;
684 rctx->ws = rscreen->ws;
685 rctx->family = rscreen->family;
686 rctx->chip_class = rscreen->chip_class;
687
688 rctx->b.invalidate_resource = si_invalidate_resource;
689 rctx->b.resource_commit = r600_resource_commit;
690 rctx->b.transfer_map = u_transfer_map_vtbl;
691 rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
692 rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
693 rctx->b.texture_subdata = u_default_texture_subdata;
694 rctx->b.memory_barrier = r600_memory_barrier;
695 rctx->b.flush = r600_flush_from_st;
696 rctx->b.set_debug_callback = r600_set_debug_callback;
697 rctx->b.create_fence_fd = r600_create_fence_fd;
698 rctx->b.fence_server_sync = r600_fence_server_sync;
699 rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
700 rctx->b.buffer_subdata = si_buffer_subdata;
701
702 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
703 rctx->b.get_device_reset_status = r600_get_reset_status;
704 rctx->gpu_reset_counter =
705 rctx->ws->query_value(rctx->ws,
706 RADEON_GPU_RESET_COUNTER);
707 }
708
709 rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
710
711 si_init_context_texture_functions(rctx);
712 si_init_query_functions(rctx);
713
714 if (rctx->chip_class == CIK ||
715 rctx->chip_class == VI ||
716 rctx->chip_class == GFX9) {
717 rctx->eop_bug_scratch = (struct r600_resource*)
718 pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
719 16 * rscreen->info.num_render_backends);
720 if (!rctx->eop_bug_scratch)
721 return false;
722 }
723
724 rctx->allocator_zeroed_memory =
725 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
726 0, PIPE_USAGE_DEFAULT, 0, true);
727 if (!rctx->allocator_zeroed_memory)
728 return false;
729
730 rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
731 0, PIPE_USAGE_STREAM);
732 if (!rctx->b.stream_uploader)
733 return false;
734
735 rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
736 0, PIPE_USAGE_DEFAULT);
737 if (!rctx->b.const_uploader)
738 return false;
739
740 rctx->ctx = rctx->ws->ctx_create(rctx->ws);
741 if (!rctx->ctx)
742 return false;
743
744 if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
745 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
746 r600_flush_dma_ring,
747 rctx);
748 rctx->dma.flush = r600_flush_dma_ring;
749 }
750
751 return true;
752 }
753
754 void si_common_context_cleanup(struct r600_common_context *rctx)
755 {
756 unsigned i,j;
757
758 /* Release DCC stats. */
759 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
760 assert(!rctx->dcc_stats[i].query_active);
761
762 for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
763 if (rctx->dcc_stats[i].ps_stats[j])
764 rctx->b.destroy_query(&rctx->b,
765 rctx->dcc_stats[i].ps_stats[j]);
766
767 r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
768 }
769
770 if (rctx->query_result_shader)
771 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
772
773 if (rctx->gfx.cs)
774 rctx->ws->cs_destroy(rctx->gfx.cs);
775 if (rctx->dma.cs)
776 rctx->ws->cs_destroy(rctx->dma.cs);
777 if (rctx->ctx)
778 rctx->ws->ctx_destroy(rctx->ctx);
779
780 if (rctx->b.stream_uploader)
781 u_upload_destroy(rctx->b.stream_uploader);
782 if (rctx->b.const_uploader)
783 u_upload_destroy(rctx->b.const_uploader);
784
785 slab_destroy_child(&rctx->pool_transfers);
786 slab_destroy_child(&rctx->pool_transfers_unsync);
787
788 if (rctx->allocator_zeroed_memory) {
789 u_suballocator_destroy(rctx->allocator_zeroed_memory);
790 }
791 rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
792 rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
793 r600_resource_reference(&rctx->eop_bug_scratch, NULL);
794 }
795
796 /*
797 * pipe_screen
798 */
799
800 static const struct debug_named_value common_debug_options[] = {
801 /* logging */
802 { "tex", DBG(TEX), "Print texture info" },
803 { "nir", DBG(NIR), "Enable experimental NIR shaders" },
804 { "compute", DBG(COMPUTE), "Print compute info" },
805 { "vm", DBG(VM), "Print virtual addresses when creating resources" },
806 { "info", DBG(INFO), "Print driver information" },
807
808 /* shaders */
809 { "vs", DBG(VS), "Print vertex shaders" },
810 { "gs", DBG(GS), "Print geometry shaders" },
811 { "ps", DBG(PS), "Print pixel shaders" },
812 { "cs", DBG(CS), "Print compute shaders" },
813 { "tcs", DBG(TCS), "Print tessellation control shaders" },
814 { "tes", DBG(TES), "Print tessellation evaluation shaders" },
815 { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
816 { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
817 { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
818 { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
819 { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
820 { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
821
822 { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
823 { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
824 { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
825 { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
826
827 /* features */
828 { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
829 { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
830 /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
831 { "noinvalrange", DBG(NO_DISCARD_RANGE), "Disable handling of INVALIDATE_RANGE map flags" },
832 { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
833 { "notiling", DBG(NO_TILING), "Disable tiling" },
834 { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
835 { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
836 { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
837 { "nowc", DBG(NO_WC), "Disable GTT write combining" },
838 { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
839 { "nodcc", DBG(NO_DCC), "Disable DCC." },
840 { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
841 { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
842 { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
843 { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
844 { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
845 { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
846 { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
847 { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
848 { "dpbb", DBG(DPBB), "Enable DPBB." },
849 { "dfsm", DBG(DFSM), "Enable DFSM." },
850 { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
851
852 DEBUG_NAMED_VALUE_END /* must be last */
853 };
854
855 static const char* r600_get_vendor(struct pipe_screen* pscreen)
856 {
857 return "X.Org";
858 }
859
860 static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
861 {
862 return "AMD";
863 }
864
865 static const char *r600_get_marketing_name(struct radeon_winsys *ws)
866 {
867 if (!ws->get_chip_name)
868 return NULL;
869 return ws->get_chip_name(ws);
870 }
871
872 static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
873 {
874 switch (rscreen->info.family) {
875 case CHIP_TAHITI: return "AMD TAHITI";
876 case CHIP_PITCAIRN: return "AMD PITCAIRN";
877 case CHIP_VERDE: return "AMD CAPE VERDE";
878 case CHIP_OLAND: return "AMD OLAND";
879 case CHIP_HAINAN: return "AMD HAINAN";
880 case CHIP_BONAIRE: return "AMD BONAIRE";
881 case CHIP_KAVERI: return "AMD KAVERI";
882 case CHIP_KABINI: return "AMD KABINI";
883 case CHIP_HAWAII: return "AMD HAWAII";
884 case CHIP_MULLINS: return "AMD MULLINS";
885 case CHIP_TONGA: return "AMD TONGA";
886 case CHIP_ICELAND: return "AMD ICELAND";
887 case CHIP_CARRIZO: return "AMD CARRIZO";
888 case CHIP_FIJI: return "AMD FIJI";
889 case CHIP_POLARIS10: return "AMD POLARIS10";
890 case CHIP_POLARIS11: return "AMD POLARIS11";
891 case CHIP_POLARIS12: return "AMD POLARIS12";
892 case CHIP_STONEY: return "AMD STONEY";
893 case CHIP_VEGA10: return "AMD VEGA10";
894 case CHIP_RAVEN: return "AMD RAVEN";
895 default: return "AMD unknown";
896 }
897 }
898
899 static void r600_disk_cache_create(struct r600_common_screen *rscreen)
900 {
901 /* Don't use the cache if shader dumping is enabled. */
902 if (rscreen->debug_flags & DBG_ALL_SHADERS)
903 return;
904
905 uint32_t mesa_timestamp;
906 if (disk_cache_get_function_timestamp(r600_disk_cache_create,
907 &mesa_timestamp)) {
908 char *timestamp_str;
909 int res = -1;
910 uint32_t llvm_timestamp;
911
912 if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
913 &llvm_timestamp)) {
914 res = asprintf(&timestamp_str, "%u_%u",
915 mesa_timestamp, llvm_timestamp);
916 }
917
918 if (res != -1) {
919 /* These flags affect shader compilation. */
920 uint64_t shader_debug_flags =
921 rscreen->debug_flags &
922 (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
923 DBG(SI_SCHED) |
924 DBG(UNSAFE_MATH));
925
926 rscreen->disk_shader_cache =
927 disk_cache_create(r600_get_family_name(rscreen),
928 timestamp_str,
929 shader_debug_flags);
930 free(timestamp_str);
931 }
932 }
933 }
934
935 static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
936 {
937 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
938 return rscreen->disk_shader_cache;
939 }
940
941 static const char* r600_get_name(struct pipe_screen* pscreen)
942 {
943 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
944
945 return rscreen->renderer_string;
946 }
947
948 static float r600_get_paramf(struct pipe_screen* pscreen,
949 enum pipe_capf param)
950 {
951 switch (param) {
952 case PIPE_CAPF_MAX_LINE_WIDTH:
953 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
954 case PIPE_CAPF_MAX_POINT_WIDTH:
955 case PIPE_CAPF_MAX_POINT_WIDTH_AA:
956 return 8192.0f;
957 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
958 return 16.0f;
959 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
960 return 16.0f;
961 case PIPE_CAPF_GUARD_BAND_LEFT:
962 case PIPE_CAPF_GUARD_BAND_TOP:
963 case PIPE_CAPF_GUARD_BAND_RIGHT:
964 case PIPE_CAPF_GUARD_BAND_BOTTOM:
965 return 0.0f;
966 }
967 return 0.0f;
968 }
969
970 static int r600_get_video_param(struct pipe_screen *screen,
971 enum pipe_video_profile profile,
972 enum pipe_video_entrypoint entrypoint,
973 enum pipe_video_cap param)
974 {
975 switch (param) {
976 case PIPE_VIDEO_CAP_SUPPORTED:
977 return vl_profile_supported(screen, profile, entrypoint);
978 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
979 return 1;
980 case PIPE_VIDEO_CAP_MAX_WIDTH:
981 case PIPE_VIDEO_CAP_MAX_HEIGHT:
982 return vl_video_buffer_max_size(screen);
983 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
984 return PIPE_FORMAT_NV12;
985 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
986 return false;
987 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
988 return false;
989 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
990 return true;
991 case PIPE_VIDEO_CAP_MAX_LEVEL:
992 return vl_level_supported(screen, profile);
993 default:
994 return 0;
995 }
996 }
997
998 const char *si_get_llvm_processor_name(enum radeon_family family)
999 {
1000 switch (family) {
1001 case CHIP_TAHITI: return "tahiti";
1002 case CHIP_PITCAIRN: return "pitcairn";
1003 case CHIP_VERDE: return "verde";
1004 case CHIP_OLAND: return "oland";
1005 case CHIP_HAINAN: return "hainan";
1006 case CHIP_BONAIRE: return "bonaire";
1007 case CHIP_KABINI: return "kabini";
1008 case CHIP_KAVERI: return "kaveri";
1009 case CHIP_HAWAII: return "hawaii";
1010 case CHIP_MULLINS:
1011 return "mullins";
1012 case CHIP_TONGA: return "tonga";
1013 case CHIP_ICELAND: return "iceland";
1014 case CHIP_CARRIZO: return "carrizo";
1015 case CHIP_FIJI:
1016 return "fiji";
1017 case CHIP_STONEY:
1018 return "stoney";
1019 case CHIP_POLARIS10:
1020 return "polaris10";
1021 case CHIP_POLARIS11:
1022 case CHIP_POLARIS12: /* same as polaris11 */
1023 return "polaris11";
1024 case CHIP_VEGA10:
1025 case CHIP_RAVEN:
1026 return "gfx900";
1027 default:
1028 return "";
1029 }
1030 }
1031
1032 static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
1033 enum pipe_shader_ir ir_type)
1034 {
1035 if (ir_type != PIPE_SHADER_IR_TGSI)
1036 return 256;
1037
1038 /* Only 16 waves per thread-group on gfx9. */
1039 if (screen->chip_class >= GFX9)
1040 return 1024;
1041
1042 /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
1043 * round number.
1044 */
1045 return 2048;
1046 }
1047
1048 static int r600_get_compute_param(struct pipe_screen *screen,
1049 enum pipe_shader_ir ir_type,
1050 enum pipe_compute_cap param,
1051 void *ret)
1052 {
1053 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
1054
1055 //TODO: select these params by asic
1056 switch (param) {
1057 case PIPE_COMPUTE_CAP_IR_TARGET: {
1058 const char *gpu;
1059 const char *triple;
1060
1061 if (HAVE_LLVM < 0x0400)
1062 triple = "amdgcn--";
1063 else
1064 triple = "amdgcn-mesa-mesa3d";
1065
1066 gpu = si_get_llvm_processor_name(rscreen->family);
1067 if (ret) {
1068 sprintf(ret, "%s-%s", gpu, triple);
1069 }
1070 /* +2 for dash and terminating NIL byte */
1071 return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
1072 }
1073 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
1074 if (ret) {
1075 uint64_t *grid_dimension = ret;
1076 grid_dimension[0] = 3;
1077 }
1078 return 1 * sizeof(uint64_t);
1079
1080 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
1081 if (ret) {
1082 uint64_t *grid_size = ret;
1083 grid_size[0] = 65535;
1084 grid_size[1] = 65535;
1085 grid_size[2] = 65535;
1086 }
1087 return 3 * sizeof(uint64_t) ;
1088
1089 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
1090 if (ret) {
1091 uint64_t *block_size = ret;
1092 unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
1093 block_size[0] = threads_per_block;
1094 block_size[1] = threads_per_block;
1095 block_size[2] = threads_per_block;
1096 }
1097 return 3 * sizeof(uint64_t);
1098
1099 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
1100 if (ret) {
1101 uint64_t *max_threads_per_block = ret;
1102 *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
1103 }
1104 return sizeof(uint64_t);
1105 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
1106 if (ret) {
1107 uint32_t *address_bits = ret;
1108 address_bits[0] = 64;
1109 }
1110 return 1 * sizeof(uint32_t);
1111
1112 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
1113 if (ret) {
1114 uint64_t *max_global_size = ret;
1115 uint64_t max_mem_alloc_size;
1116
1117 r600_get_compute_param(screen, ir_type,
1118 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
1119 &max_mem_alloc_size);
1120
1121 /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
1122 * 1/4 of the MAX_GLOBAL_SIZE. Since the
1123 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
1124 * make sure we never report more than
1125 * 4 * MAX_MEM_ALLOC_SIZE.
1126 */
1127 *max_global_size = MIN2(4 * max_mem_alloc_size,
1128 MAX2(rscreen->info.gart_size,
1129 rscreen->info.vram_size));
1130 }
1131 return sizeof(uint64_t);
1132
1133 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
1134 if (ret) {
1135 uint64_t *max_local_size = ret;
1136 /* Value reported by the closed source driver. */
1137 *max_local_size = 32768;
1138 }
1139 return sizeof(uint64_t);
1140
1141 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
1142 if (ret) {
1143 uint64_t *max_input_size = ret;
1144 /* Value reported by the closed source driver. */
1145 *max_input_size = 1024;
1146 }
1147 return sizeof(uint64_t);
1148
1149 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
1150 if (ret) {
1151 uint64_t *max_mem_alloc_size = ret;
1152
1153 *max_mem_alloc_size = rscreen->info.max_alloc_size;
1154 }
1155 return sizeof(uint64_t);
1156
1157 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
1158 if (ret) {
1159 uint32_t *max_clock_frequency = ret;
1160 *max_clock_frequency = rscreen->info.max_shader_clock;
1161 }
1162 return sizeof(uint32_t);
1163
1164 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
1165 if (ret) {
1166 uint32_t *max_compute_units = ret;
1167 *max_compute_units = rscreen->info.num_good_compute_units;
1168 }
1169 return sizeof(uint32_t);
1170
1171 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
1172 if (ret) {
1173 uint32_t *images_supported = ret;
1174 *images_supported = 0;
1175 }
1176 return sizeof(uint32_t);
1177 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
1178 break; /* unused */
1179 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
1180 if (ret) {
1181 uint32_t *subgroup_size = ret;
1182 *subgroup_size = 64;
1183 }
1184 return sizeof(uint32_t);
1185 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
1186 if (ret) {
1187 uint64_t *max_variable_threads_per_block = ret;
1188 if (ir_type == PIPE_SHADER_IR_TGSI)
1189 *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
1190 else
1191 *max_variable_threads_per_block = 0;
1192 }
1193 return sizeof(uint64_t);
1194 }
1195
1196 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
1197 return 0;
1198 }
1199
1200 static uint64_t r600_get_timestamp(struct pipe_screen *screen)
1201 {
1202 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1203
1204 return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
1205 rscreen->info.clock_crystal_freq;
1206 }
1207
1208 static void r600_fence_reference(struct pipe_screen *screen,
1209 struct pipe_fence_handle **dst,
1210 struct pipe_fence_handle *src)
1211 {
1212 struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
1213 struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst;
1214 struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src;
1215
1216 if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
1217 ws->fence_reference(&(*rdst)->gfx, NULL);
1218 ws->fence_reference(&(*rdst)->sdma, NULL);
1219 FREE(*rdst);
1220 }
1221 *rdst = rsrc;
1222 }
1223
1224 static boolean r600_fence_finish(struct pipe_screen *screen,
1225 struct pipe_context *ctx,
1226 struct pipe_fence_handle *fence,
1227 uint64_t timeout)
1228 {
1229 struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
1230 struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
1231 struct r600_common_context *rctx;
1232 int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
1233
1234 ctx = threaded_context_unwrap_sync(ctx);
1235 rctx = ctx ? (struct r600_common_context*)ctx : NULL;
1236
1237 if (rfence->sdma) {
1238 if (!rws->fence_wait(rws, rfence->sdma, timeout))
1239 return false;
1240
1241 /* Recompute the timeout after waiting. */
1242 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
1243 int64_t time = os_time_get_nano();
1244 timeout = abs_timeout > time ? abs_timeout - time : 0;
1245 }
1246 }
1247
1248 if (!rfence->gfx)
1249 return true;
1250
1251 /* Flush the gfx IB if it hasn't been flushed yet. */
1252 if (rctx &&
1253 rfence->gfx_unflushed.ctx == rctx &&
1254 rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
1255 rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
1256 rfence->gfx_unflushed.ctx = NULL;
1257
1258 if (!timeout)
1259 return false;
1260
1261 /* Recompute the timeout after all that. */
1262 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
1263 int64_t time = os_time_get_nano();
1264 timeout = abs_timeout > time ? abs_timeout - time : 0;
1265 }
1266 }
1267
1268 return rws->fence_wait(rws, rfence->gfx, timeout);
1269 }
1270
1271 static void r600_query_memory_info(struct pipe_screen *screen,
1272 struct pipe_memory_info *info)
1273 {
1274 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1275 struct radeon_winsys *ws = rscreen->ws;
1276 unsigned vram_usage, gtt_usage;
1277
1278 info->total_device_memory = rscreen->info.vram_size / 1024;
1279 info->total_staging_memory = rscreen->info.gart_size / 1024;
1280
1281 /* The real TTM memory usage is somewhat random, because:
1282 *
1283 * 1) TTM delays freeing memory, because it can only free it after
1284 * fences expire.
1285 *
1286 * 2) The memory usage can be really low if big VRAM evictions are
1287 * taking place, but the real usage is well above the size of VRAM.
1288 *
1289 * Instead, return statistics of this process.
1290 */
1291 vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
1292 gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
1293
1294 info->avail_device_memory =
1295 vram_usage <= info->total_device_memory ?
1296 info->total_device_memory - vram_usage : 0;
1297 info->avail_staging_memory =
1298 gtt_usage <= info->total_staging_memory ?
1299 info->total_staging_memory - gtt_usage : 0;
1300
1301 info->device_memory_evicted =
1302 ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
1303
1304 if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
1305 info->nr_device_memory_evictions =
1306 ws->query_value(ws, RADEON_NUM_EVICTIONS);
1307 else
1308 /* Just return the number of evicted 64KB pages. */
1309 info->nr_device_memory_evictions = info->device_memory_evicted / 64;
1310 }
1311
1312 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
1313 const struct pipe_resource *templ)
1314 {
1315 if (templ->target == PIPE_BUFFER) {
1316 return si_buffer_create(screen, templ, 256);
1317 } else {
1318 return si_texture_create(screen, templ);
1319 }
1320 }
1321
1322 bool si_common_screen_init(struct r600_common_screen *rscreen,
1323 struct radeon_winsys *ws)
1324 {
1325 char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
1326 struct utsname uname_data;
1327 const char *chip_name;
1328
1329 ws->query_info(ws, &rscreen->info);
1330 rscreen->ws = ws;
1331
1332 if ((chip_name = r600_get_marketing_name(ws)))
1333 snprintf(family_name, sizeof(family_name), "%s / ",
1334 r600_get_family_name(rscreen) + 4);
1335 else
1336 chip_name = r600_get_family_name(rscreen);
1337
1338 if (uname(&uname_data) == 0)
1339 snprintf(kernel_version, sizeof(kernel_version),
1340 " / %s", uname_data.release);
1341
1342 if (HAVE_LLVM > 0) {
1343 snprintf(llvm_string, sizeof(llvm_string),
1344 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
1345 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
1346 }
1347
1348 snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
1349 "%s (%sDRM %i.%i.%i%s%s)",
1350 chip_name, family_name, rscreen->info.drm_major,
1351 rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
1352 kernel_version, llvm_string);
1353
1354 rscreen->b.get_name = r600_get_name;
1355 rscreen->b.get_vendor = r600_get_vendor;
1356 rscreen->b.get_device_vendor = r600_get_device_vendor;
1357 rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
1358 rscreen->b.get_compute_param = r600_get_compute_param;
1359 rscreen->b.get_paramf = r600_get_paramf;
1360 rscreen->b.get_timestamp = r600_get_timestamp;
1361 rscreen->b.fence_finish = r600_fence_finish;
1362 rscreen->b.fence_reference = r600_fence_reference;
1363 rscreen->b.resource_destroy = u_resource_destroy_vtbl;
1364 rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
1365 rscreen->b.query_memory_info = r600_query_memory_info;
1366 rscreen->b.fence_get_fd = r600_fence_get_fd;
1367
1368 if (rscreen->info.has_hw_decode) {
1369 rscreen->b.get_video_param = si_vid_get_video_param;
1370 rscreen->b.is_video_format_supported = si_vid_is_format_supported;
1371 } else {
1372 rscreen->b.get_video_param = r600_get_video_param;
1373 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
1374 }
1375
1376 si_init_screen_texture_functions(rscreen);
1377 si_init_screen_query_functions(rscreen);
1378
1379 rscreen->family = rscreen->info.family;
1380 rscreen->chip_class = rscreen->info.chip_class;
1381 rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
1382 rscreen->has_rbplus = false;
1383 rscreen->rbplus_allowed = false;
1384
1385 r600_disk_cache_create(rscreen);
1386
1387 slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
1388
1389 rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
1390 if (rscreen->force_aniso >= 0) {
1391 printf("radeon: Forcing anisotropy filter to %ix\n",
1392 /* round down to a power of two */
1393 1 << util_logbase2(rscreen->force_aniso));
1394 }
1395
1396 (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
1397 (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
1398
1399 if (rscreen->debug_flags & DBG(INFO)) {
1400 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
1401 rscreen->info.pci_domain, rscreen->info.pci_bus,
1402 rscreen->info.pci_dev, rscreen->info.pci_func);
1403 printf("pci_id = 0x%x\n", rscreen->info.pci_id);
1404 printf("family = %i (%s)\n", rscreen->info.family,
1405 r600_get_family_name(rscreen));
1406 printf("chip_class = %i\n", rscreen->info.chip_class);
1407 printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
1408 printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
1409 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
1410 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
1411 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
1412 printf("max_alloc_size = %i MB\n",
1413 (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
1414 printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
1415 printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
1416 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
1417 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
1418 printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
1419 printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
1420 printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
1421 printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
1422 printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
1423 printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
1424 printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
1425 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
1426 printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
1427 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
1428 printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
1429 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
1430 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
1431 printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
1432 printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
1433 rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
1434 printf("has_userptr = %i\n", rscreen->info.has_userptr);
1435 printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
1436 printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
1437
1438 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
1439 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
1440 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
1441 printf("max_se = %i\n", rscreen->info.max_se);
1442 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
1443
1444 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
1445 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
1446 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
1447 printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
1448 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
1449 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
1450 printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
1451 printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
1452 }
1453 return true;
1454 }
1455
1456 void si_destroy_common_screen(struct r600_common_screen *rscreen)
1457 {
1458 si_perfcounters_destroy(rscreen);
1459 si_gpu_load_kill_thread(rscreen);
1460
1461 mtx_destroy(&rscreen->gpu_load_mutex);
1462 mtx_destroy(&rscreen->aux_context_lock);
1463 rscreen->aux_context->destroy(rscreen->aux_context);
1464
1465 slab_destroy_parent(&rscreen->pool_transfers);
1466
1467 disk_cache_destroy(rscreen->disk_shader_cache);
1468 rscreen->ws->destroy(rscreen->ws);
1469 FREE(rscreen);
1470 }
1471
1472 bool si_can_dump_shader(struct r600_common_screen *rscreen,
1473 unsigned processor)
1474 {
1475 return rscreen->debug_flags & (1 << processor);
1476 }
1477
1478 bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
1479 {
1480 return (rscreen->debug_flags & DBG(CHECK_IR)) ||
1481 si_can_dump_shader(rscreen, processor);
1482 }
1483
1484 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
1485 uint64_t offset, uint64_t size, unsigned value)
1486 {
1487 struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
1488
1489 mtx_lock(&rscreen->aux_context_lock);
1490 rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
1491 rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
1492 mtx_unlock(&rscreen->aux_context_lock);
1493 }