radeonsi: move r600_test_dma.c into si_test_dma.c
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "r600_pipe_common.h"
25 #include "r600_cs.h"
26 #include "tgsi/tgsi_parse.h"
27 #include "util/list.h"
28 #include "util/u_draw_quad.h"
29 #include "util/u_memory.h"
30 #include "util/u_format_s3tc.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/os_time.h"
33 #include "vl/vl_decoder.h"
34 #include "vl/vl_video_buffer.h"
35 #include "radeon/radeon_video.h"
36 #include "amd/common/ac_llvm_util.h"
37 #include "amd/common/sid.h"
38 #include <inttypes.h>
39 #include <sys/utsname.h>
40
41 #include <llvm-c/TargetMachine.h>
42
43
44 /*
45 * shader binary helpers.
46 */
47 void si_radeon_shader_binary_init(struct ac_shader_binary *b)
48 {
49 memset(b, 0, sizeof(*b));
50 }
51
52 void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
53 {
54 if (!b)
55 return;
56 FREE(b->code);
57 FREE(b->config);
58 FREE(b->rodata);
59 FREE(b->global_symbol_offsets);
60 FREE(b->relocs);
61 FREE(b->disasm_string);
62 FREE(b->llvm_ir_string);
63 }
64
65 /*
66 * pipe_context
67 */
68
69 /**
70 * Write an EOP event.
71 *
72 * \param event EVENT_TYPE_*
73 * \param event_flags Optional cache flush flags (TC)
74 * \param data_sel 1 = fence, 3 = timestamp
75 * \param buf Buffer
76 * \param va GPU address
77 * \param old_value Previous fence value (for a bug workaround)
78 * \param new_value Fence value to write for this event.
79 */
80 void si_gfx_write_event_eop(struct r600_common_context *ctx,
81 unsigned event, unsigned event_flags,
82 unsigned data_sel,
83 struct r600_resource *buf, uint64_t va,
84 uint32_t new_fence, unsigned query_type)
85 {
86 struct radeon_winsys_cs *cs = ctx->gfx.cs;
87 unsigned op = EVENT_TYPE(event) |
88 EVENT_INDEX(5) |
89 event_flags;
90 unsigned sel = EOP_DATA_SEL(data_sel);
91
92 /* Wait for write confirmation before writing data, but don't send
93 * an interrupt. */
94 if (data_sel != EOP_DATA_SEL_DISCARD)
95 sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
96
97 if (ctx->chip_class >= GFX9) {
98 /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
99 * counters) must immediately precede every timestamp event to
100 * prevent a GPU hang on GFX9.
101 *
102 * Occlusion queries don't need to do it here, because they
103 * always do ZPASS_DONE before the timestamp.
104 */
105 if (ctx->chip_class == GFX9 &&
106 query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
107 query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
108 query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
109 struct r600_resource *scratch = ctx->eop_bug_scratch;
110
111 assert(16 * ctx->screen->info.num_render_backends <=
112 scratch->b.b.width0);
113 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
114 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
115 radeon_emit(cs, scratch->gpu_address);
116 radeon_emit(cs, scratch->gpu_address >> 32);
117
118 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
119 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
120 }
121
122 radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
123 radeon_emit(cs, op);
124 radeon_emit(cs, sel);
125 radeon_emit(cs, va); /* address lo */
126 radeon_emit(cs, va >> 32); /* address hi */
127 radeon_emit(cs, new_fence); /* immediate data lo */
128 radeon_emit(cs, 0); /* immediate data hi */
129 radeon_emit(cs, 0); /* unused */
130 } else {
131 if (ctx->chip_class == CIK ||
132 ctx->chip_class == VI) {
133 struct r600_resource *scratch = ctx->eop_bug_scratch;
134 uint64_t va = scratch->gpu_address;
135
136 /* Two EOP events are required to make all engines go idle
137 * (and optional cache flushes executed) before the timestamp
138 * is written.
139 */
140 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
141 radeon_emit(cs, op);
142 radeon_emit(cs, va);
143 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
144 radeon_emit(cs, 0); /* immediate data */
145 radeon_emit(cs, 0); /* unused */
146
147 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
148 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
149 }
150
151 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
152 radeon_emit(cs, op);
153 radeon_emit(cs, va);
154 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
155 radeon_emit(cs, new_fence); /* immediate data */
156 radeon_emit(cs, 0); /* unused */
157 }
158
159 if (buf) {
160 radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
161 RADEON_PRIO_QUERY);
162 }
163 }
164
165 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
166 {
167 unsigned dwords = 6;
168
169 if (screen->chip_class == CIK ||
170 screen->chip_class == VI)
171 dwords *= 2;
172
173 if (!screen->info.has_virtual_memory)
174 dwords += 2;
175
176 return dwords;
177 }
178
179 void si_gfx_wait_fence(struct r600_common_context *ctx,
180 uint64_t va, uint32_t ref, uint32_t mask)
181 {
182 struct radeon_winsys_cs *cs = ctx->gfx.cs;
183
184 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
185 radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
186 radeon_emit(cs, va);
187 radeon_emit(cs, va >> 32);
188 radeon_emit(cs, ref); /* reference value */
189 radeon_emit(cs, mask); /* mask */
190 radeon_emit(cs, 4); /* poll interval */
191 }
192
193 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
194 {
195 struct radeon_winsys_cs *cs = rctx->dma.cs;
196
197 /* NOP waits for idle on Evergreen and later. */
198 if (rctx->chip_class >= CIK)
199 radeon_emit(cs, 0x00000000); /* NOP */
200 else
201 radeon_emit(cs, 0xf0000000); /* NOP */
202 }
203
204 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
205 struct r600_resource *dst, struct r600_resource *src)
206 {
207 uint64_t vram = ctx->dma.cs->used_vram;
208 uint64_t gtt = ctx->dma.cs->used_gart;
209
210 if (dst) {
211 vram += dst->vram_usage;
212 gtt += dst->gart_usage;
213 }
214 if (src) {
215 vram += src->vram_usage;
216 gtt += src->gart_usage;
217 }
218
219 /* Flush the GFX IB if DMA depends on it. */
220 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
221 ((dst &&
222 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
223 RADEON_USAGE_READWRITE)) ||
224 (src &&
225 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
226 RADEON_USAGE_WRITE))))
227 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
228
229 /* Flush if there's not enough space, or if the memory usage per IB
230 * is too large.
231 *
232 * IBs using too little memory are limited by the IB submission overhead.
233 * IBs using too much memory are limited by the kernel/TTM overhead.
234 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
235 *
236 * This heuristic makes sure that DMA requests are executed
237 * very soon after the call is made and lowers memory usage.
238 * It improves texture upload performance by keeping the DMA
239 * engine busy while uploads are being submitted.
240 */
241 num_dw++; /* for emit_wait_idle below */
242 if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
243 ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
244 !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
245 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
246 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
247 }
248
249 /* Wait for idle if either buffer has been used in the IB before to
250 * prevent read-after-write hazards.
251 */
252 if ((dst &&
253 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
254 RADEON_USAGE_READWRITE)) ||
255 (src &&
256 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
257 RADEON_USAGE_WRITE)))
258 r600_dma_emit_wait_idle(ctx);
259
260 /* If GPUVM is not supported, the CS checker needs 2 entries
261 * in the buffer list per packet, which has to be done manually.
262 */
263 if (ctx->screen->info.has_virtual_memory) {
264 if (dst)
265 radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
266 RADEON_USAGE_WRITE,
267 RADEON_PRIO_SDMA_BUFFER);
268 if (src)
269 radeon_add_to_buffer_list(ctx, &ctx->dma, src,
270 RADEON_USAGE_READ,
271 RADEON_PRIO_SDMA_BUFFER);
272 }
273
274 /* this function is called before all DMA calls, so increment this. */
275 ctx->num_dma_calls++;
276 }
277
278 static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
279 {
280 }
281
282 void si_preflush_suspend_features(struct r600_common_context *ctx)
283 {
284 /* suspend queries */
285 if (!LIST_IS_EMPTY(&ctx->active_queries))
286 si_suspend_queries(ctx);
287 }
288
289 void si_postflush_resume_features(struct r600_common_context *ctx)
290 {
291 /* resume queries */
292 if (!LIST_IS_EMPTY(&ctx->active_queries))
293 si_resume_queries(ctx);
294 }
295
296 static void r600_flush_dma_ring(void *ctx, unsigned flags,
297 struct pipe_fence_handle **fence)
298 {
299 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
300 struct radeon_winsys_cs *cs = rctx->dma.cs;
301 struct radeon_saved_cs saved;
302 bool check_vm =
303 (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
304 rctx->check_vm_faults;
305
306 if (!radeon_emitted(cs, 0)) {
307 if (fence)
308 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
309 return;
310 }
311
312 if (check_vm)
313 si_save_cs(rctx->ws, cs, &saved, true);
314
315 rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
316 if (fence)
317 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
318
319 if (check_vm) {
320 /* Use conservative timeout 800ms, after which we won't wait any
321 * longer and assume the GPU is hung.
322 */
323 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
324
325 rctx->check_vm_faults(rctx, &saved, RING_DMA);
326 si_clear_saved_cs(&saved);
327 }
328 }
329
330 /**
331 * Store a linearized copy of all chunks of \p cs together with the buffer
332 * list in \p saved.
333 */
334 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
335 struct radeon_saved_cs *saved, bool get_buffer_list)
336 {
337 uint32_t *buf;
338 unsigned i;
339
340 /* Save the IB chunks. */
341 saved->num_dw = cs->prev_dw + cs->current.cdw;
342 saved->ib = MALLOC(4 * saved->num_dw);
343 if (!saved->ib)
344 goto oom;
345
346 buf = saved->ib;
347 for (i = 0; i < cs->num_prev; ++i) {
348 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
349 buf += cs->prev[i].cdw;
350 }
351 memcpy(buf, cs->current.buf, cs->current.cdw * 4);
352
353 if (!get_buffer_list)
354 return;
355
356 /* Save the buffer list. */
357 saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
358 saved->bo_list = CALLOC(saved->bo_count,
359 sizeof(saved->bo_list[0]));
360 if (!saved->bo_list) {
361 FREE(saved->ib);
362 goto oom;
363 }
364 ws->cs_get_buffer_list(cs, saved->bo_list);
365
366 return;
367
368 oom:
369 fprintf(stderr, "%s: out of memory\n", __func__);
370 memset(saved, 0, sizeof(*saved));
371 }
372
373 void si_clear_saved_cs(struct radeon_saved_cs *saved)
374 {
375 FREE(saved->ib);
376 FREE(saved->bo_list);
377
378 memset(saved, 0, sizeof(*saved));
379 }
380
381 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
382 {
383 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
384 unsigned latest = rctx->ws->query_value(rctx->ws,
385 RADEON_GPU_RESET_COUNTER);
386
387 if (rctx->gpu_reset_counter == latest)
388 return PIPE_NO_RESET;
389
390 rctx->gpu_reset_counter = latest;
391 return PIPE_UNKNOWN_CONTEXT_RESET;
392 }
393
394 static void r600_set_device_reset_callback(struct pipe_context *ctx,
395 const struct pipe_device_reset_callback *cb)
396 {
397 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
398
399 if (cb)
400 rctx->device_reset_callback = *cb;
401 else
402 memset(&rctx->device_reset_callback, 0,
403 sizeof(rctx->device_reset_callback));
404 }
405
406 bool si_check_device_reset(struct r600_common_context *rctx)
407 {
408 enum pipe_reset_status status;
409
410 if (!rctx->device_reset_callback.reset)
411 return false;
412
413 if (!rctx->b.get_device_reset_status)
414 return false;
415
416 status = rctx->b.get_device_reset_status(&rctx->b);
417 if (status == PIPE_NO_RESET)
418 return false;
419
420 rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
421 return true;
422 }
423
424 static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
425 struct pipe_resource *dst,
426 uint64_t offset, uint64_t size,
427 unsigned value)
428 {
429 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
430
431 rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE);
432 }
433
434 static bool r600_resource_commit(struct pipe_context *pctx,
435 struct pipe_resource *resource,
436 unsigned level, struct pipe_box *box,
437 bool commit)
438 {
439 struct r600_common_context *ctx = (struct r600_common_context *)pctx;
440 struct r600_resource *res = r600_resource(resource);
441
442 /*
443 * Since buffer commitment changes cannot be pipelined, we need to
444 * (a) flush any pending commands that refer to the buffer we're about
445 * to change, and
446 * (b) wait for threaded submit to finish, including those that were
447 * triggered by some other, earlier operation.
448 */
449 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
450 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
451 res->buf, RADEON_USAGE_READWRITE)) {
452 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
453 }
454 if (radeon_emitted(ctx->dma.cs, 0) &&
455 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
456 res->buf, RADEON_USAGE_READWRITE)) {
457 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
458 }
459
460 ctx->ws->cs_sync_flush(ctx->dma.cs);
461 ctx->ws->cs_sync_flush(ctx->gfx.cs);
462
463 assert(resource->target == PIPE_BUFFER);
464
465 return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
466 }
467
468 bool si_common_context_init(struct r600_common_context *rctx,
469 struct r600_common_screen *rscreen,
470 unsigned context_flags)
471 {
472 slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
473 slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
474
475 rctx->screen = rscreen;
476 rctx->ws = rscreen->ws;
477 rctx->family = rscreen->family;
478 rctx->chip_class = rscreen->chip_class;
479
480 rctx->b.invalidate_resource = si_invalidate_resource;
481 rctx->b.resource_commit = r600_resource_commit;
482 rctx->b.transfer_map = u_transfer_map_vtbl;
483 rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
484 rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
485 rctx->b.texture_subdata = u_default_texture_subdata;
486 rctx->b.memory_barrier = r600_memory_barrier;
487 rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
488 rctx->b.buffer_subdata = si_buffer_subdata;
489
490 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
491 rctx->b.get_device_reset_status = r600_get_reset_status;
492 rctx->gpu_reset_counter =
493 rctx->ws->query_value(rctx->ws,
494 RADEON_GPU_RESET_COUNTER);
495 }
496
497 rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
498
499 si_init_context_texture_functions(rctx);
500 si_init_query_functions(rctx);
501
502 if (rctx->chip_class == CIK ||
503 rctx->chip_class == VI ||
504 rctx->chip_class == GFX9) {
505 rctx->eop_bug_scratch = (struct r600_resource*)
506 pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
507 16 * rscreen->info.num_render_backends);
508 if (!rctx->eop_bug_scratch)
509 return false;
510 }
511
512 rctx->allocator_zeroed_memory =
513 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
514 0, PIPE_USAGE_DEFAULT, 0, true);
515 if (!rctx->allocator_zeroed_memory)
516 return false;
517
518 rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
519 0, PIPE_USAGE_STREAM);
520 if (!rctx->b.stream_uploader)
521 return false;
522
523 rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
524 0, PIPE_USAGE_DEFAULT);
525 if (!rctx->b.const_uploader)
526 return false;
527
528 rctx->ctx = rctx->ws->ctx_create(rctx->ws);
529 if (!rctx->ctx)
530 return false;
531
532 if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
533 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
534 r600_flush_dma_ring,
535 rctx);
536 rctx->dma.flush = r600_flush_dma_ring;
537 }
538
539 return true;
540 }
541
542 void si_common_context_cleanup(struct r600_common_context *rctx)
543 {
544 unsigned i,j;
545
546 /* Release DCC stats. */
547 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
548 assert(!rctx->dcc_stats[i].query_active);
549
550 for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
551 if (rctx->dcc_stats[i].ps_stats[j])
552 rctx->b.destroy_query(&rctx->b,
553 rctx->dcc_stats[i].ps_stats[j]);
554
555 r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
556 }
557
558 if (rctx->query_result_shader)
559 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
560
561 if (rctx->gfx.cs)
562 rctx->ws->cs_destroy(rctx->gfx.cs);
563 if (rctx->dma.cs)
564 rctx->ws->cs_destroy(rctx->dma.cs);
565 if (rctx->ctx)
566 rctx->ws->ctx_destroy(rctx->ctx);
567
568 if (rctx->b.stream_uploader)
569 u_upload_destroy(rctx->b.stream_uploader);
570 if (rctx->b.const_uploader)
571 u_upload_destroy(rctx->b.const_uploader);
572
573 slab_destroy_child(&rctx->pool_transfers);
574 slab_destroy_child(&rctx->pool_transfers_unsync);
575
576 if (rctx->allocator_zeroed_memory) {
577 u_suballocator_destroy(rctx->allocator_zeroed_memory);
578 }
579 rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
580 rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
581 r600_resource_reference(&rctx->eop_bug_scratch, NULL);
582 }
583
584 /*
585 * pipe_screen
586 */
587
588 static const struct debug_named_value common_debug_options[] = {
589 /* logging */
590 { "tex", DBG(TEX), "Print texture info" },
591 { "nir", DBG(NIR), "Enable experimental NIR shaders" },
592 { "compute", DBG(COMPUTE), "Print compute info" },
593 { "vm", DBG(VM), "Print virtual addresses when creating resources" },
594 { "info", DBG(INFO), "Print driver information" },
595
596 /* shaders */
597 { "vs", DBG(VS), "Print vertex shaders" },
598 { "gs", DBG(GS), "Print geometry shaders" },
599 { "ps", DBG(PS), "Print pixel shaders" },
600 { "cs", DBG(CS), "Print compute shaders" },
601 { "tcs", DBG(TCS), "Print tessellation control shaders" },
602 { "tes", DBG(TES), "Print tessellation evaluation shaders" },
603 { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
604 { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
605 { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
606 { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
607 { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
608 { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
609
610 { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
611 { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
612 { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
613 { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
614
615 /* features */
616 { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
617 { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
618 { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
619 { "notiling", DBG(NO_TILING), "Disable tiling" },
620 { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
621 { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
622 { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
623 { "nowc", DBG(NO_WC), "Disable GTT write combining" },
624 { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
625 { "nodcc", DBG(NO_DCC), "Disable DCC." },
626 { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
627 { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
628 { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
629 { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
630 { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
631 { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
632 { "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
633 { "dccmsaa", DBG(DCC_MSAA), "Enable DCC for MSAA" },
634 { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
635 { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
636 { "dpbb", DBG(DPBB), "Enable DPBB." },
637 { "dfsm", DBG(DFSM), "Enable DFSM." },
638 { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
639 { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." },
640
641 DEBUG_NAMED_VALUE_END /* must be last */
642 };
643
644 static const char* r600_get_vendor(struct pipe_screen* pscreen)
645 {
646 return "X.Org";
647 }
648
649 static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
650 {
651 return "AMD";
652 }
653
654 static const char *r600_get_marketing_name(struct radeon_winsys *ws)
655 {
656 if (!ws->get_chip_name)
657 return NULL;
658 return ws->get_chip_name(ws);
659 }
660
661 static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
662 {
663 switch (rscreen->info.family) {
664 case CHIP_TAHITI: return "AMD TAHITI";
665 case CHIP_PITCAIRN: return "AMD PITCAIRN";
666 case CHIP_VERDE: return "AMD CAPE VERDE";
667 case CHIP_OLAND: return "AMD OLAND";
668 case CHIP_HAINAN: return "AMD HAINAN";
669 case CHIP_BONAIRE: return "AMD BONAIRE";
670 case CHIP_KAVERI: return "AMD KAVERI";
671 case CHIP_KABINI: return "AMD KABINI";
672 case CHIP_HAWAII: return "AMD HAWAII";
673 case CHIP_MULLINS: return "AMD MULLINS";
674 case CHIP_TONGA: return "AMD TONGA";
675 case CHIP_ICELAND: return "AMD ICELAND";
676 case CHIP_CARRIZO: return "AMD CARRIZO";
677 case CHIP_FIJI: return "AMD FIJI";
678 case CHIP_POLARIS10: return "AMD POLARIS10";
679 case CHIP_POLARIS11: return "AMD POLARIS11";
680 case CHIP_POLARIS12: return "AMD POLARIS12";
681 case CHIP_STONEY: return "AMD STONEY";
682 case CHIP_VEGA10: return "AMD VEGA10";
683 case CHIP_RAVEN: return "AMD RAVEN";
684 default: return "AMD unknown";
685 }
686 }
687
688 static void r600_disk_cache_create(struct r600_common_screen *rscreen)
689 {
690 /* Don't use the cache if shader dumping is enabled. */
691 if (rscreen->debug_flags & DBG_ALL_SHADERS)
692 return;
693
694 /* TODO: remove this once gallium supports a nir cache */
695 if (rscreen->debug_flags & DBG(NIR))
696 return;
697
698 uint32_t mesa_timestamp;
699 if (disk_cache_get_function_timestamp(r600_disk_cache_create,
700 &mesa_timestamp)) {
701 char *timestamp_str;
702 int res = -1;
703 uint32_t llvm_timestamp;
704
705 if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
706 &llvm_timestamp)) {
707 res = asprintf(&timestamp_str, "%u_%u",
708 mesa_timestamp, llvm_timestamp);
709 }
710
711 if (res != -1) {
712 /* These flags affect shader compilation. */
713 uint64_t shader_debug_flags =
714 rscreen->debug_flags &
715 (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
716 DBG(SI_SCHED) |
717 DBG(UNSAFE_MATH));
718
719 rscreen->disk_shader_cache =
720 disk_cache_create(r600_get_family_name(rscreen),
721 timestamp_str,
722 shader_debug_flags);
723 free(timestamp_str);
724 }
725 }
726 }
727
728 static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
729 {
730 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
731 return rscreen->disk_shader_cache;
732 }
733
734 static const char* r600_get_name(struct pipe_screen* pscreen)
735 {
736 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
737
738 return rscreen->renderer_string;
739 }
740
741 static float r600_get_paramf(struct pipe_screen* pscreen,
742 enum pipe_capf param)
743 {
744 switch (param) {
745 case PIPE_CAPF_MAX_LINE_WIDTH:
746 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
747 case PIPE_CAPF_MAX_POINT_WIDTH:
748 case PIPE_CAPF_MAX_POINT_WIDTH_AA:
749 return 8192.0f;
750 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
751 return 16.0f;
752 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
753 return 16.0f;
754 case PIPE_CAPF_GUARD_BAND_LEFT:
755 case PIPE_CAPF_GUARD_BAND_TOP:
756 case PIPE_CAPF_GUARD_BAND_RIGHT:
757 case PIPE_CAPF_GUARD_BAND_BOTTOM:
758 return 0.0f;
759 }
760 return 0.0f;
761 }
762
763 static int r600_get_video_param(struct pipe_screen *screen,
764 enum pipe_video_profile profile,
765 enum pipe_video_entrypoint entrypoint,
766 enum pipe_video_cap param)
767 {
768 switch (param) {
769 case PIPE_VIDEO_CAP_SUPPORTED:
770 return vl_profile_supported(screen, profile, entrypoint);
771 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
772 return 1;
773 case PIPE_VIDEO_CAP_MAX_WIDTH:
774 case PIPE_VIDEO_CAP_MAX_HEIGHT:
775 return vl_video_buffer_max_size(screen);
776 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
777 return PIPE_FORMAT_NV12;
778 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
779 return false;
780 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
781 return false;
782 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
783 return true;
784 case PIPE_VIDEO_CAP_MAX_LEVEL:
785 return vl_level_supported(screen, profile);
786 default:
787 return 0;
788 }
789 }
790
791 static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
792 enum pipe_shader_ir ir_type)
793 {
794 if (ir_type != PIPE_SHADER_IR_TGSI)
795 return 256;
796
797 /* Only 16 waves per thread-group on gfx9. */
798 if (screen->chip_class >= GFX9)
799 return 1024;
800
801 /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
802 * round number.
803 */
804 return 2048;
805 }
806
807 static int r600_get_compute_param(struct pipe_screen *screen,
808 enum pipe_shader_ir ir_type,
809 enum pipe_compute_cap param,
810 void *ret)
811 {
812 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
813
814 //TODO: select these params by asic
815 switch (param) {
816 case PIPE_COMPUTE_CAP_IR_TARGET: {
817 const char *gpu;
818 const char *triple;
819
820 if (HAVE_LLVM < 0x0400)
821 triple = "amdgcn--";
822 else
823 triple = "amdgcn-mesa-mesa3d";
824
825 gpu = ac_get_llvm_processor_name(rscreen->family);
826 if (ret) {
827 sprintf(ret, "%s-%s", gpu, triple);
828 }
829 /* +2 for dash and terminating NIL byte */
830 return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
831 }
832 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
833 if (ret) {
834 uint64_t *grid_dimension = ret;
835 grid_dimension[0] = 3;
836 }
837 return 1 * sizeof(uint64_t);
838
839 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
840 if (ret) {
841 uint64_t *grid_size = ret;
842 grid_size[0] = 65535;
843 grid_size[1] = 65535;
844 grid_size[2] = 65535;
845 }
846 return 3 * sizeof(uint64_t) ;
847
848 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
849 if (ret) {
850 uint64_t *block_size = ret;
851 unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
852 block_size[0] = threads_per_block;
853 block_size[1] = threads_per_block;
854 block_size[2] = threads_per_block;
855 }
856 return 3 * sizeof(uint64_t);
857
858 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
859 if (ret) {
860 uint64_t *max_threads_per_block = ret;
861 *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
862 }
863 return sizeof(uint64_t);
864 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
865 if (ret) {
866 uint32_t *address_bits = ret;
867 address_bits[0] = 64;
868 }
869 return 1 * sizeof(uint32_t);
870
871 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
872 if (ret) {
873 uint64_t *max_global_size = ret;
874 uint64_t max_mem_alloc_size;
875
876 r600_get_compute_param(screen, ir_type,
877 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
878 &max_mem_alloc_size);
879
880 /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
881 * 1/4 of the MAX_GLOBAL_SIZE. Since the
882 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
883 * make sure we never report more than
884 * 4 * MAX_MEM_ALLOC_SIZE.
885 */
886 *max_global_size = MIN2(4 * max_mem_alloc_size,
887 MAX2(rscreen->info.gart_size,
888 rscreen->info.vram_size));
889 }
890 return sizeof(uint64_t);
891
892 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
893 if (ret) {
894 uint64_t *max_local_size = ret;
895 /* Value reported by the closed source driver. */
896 *max_local_size = 32768;
897 }
898 return sizeof(uint64_t);
899
900 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
901 if (ret) {
902 uint64_t *max_input_size = ret;
903 /* Value reported by the closed source driver. */
904 *max_input_size = 1024;
905 }
906 return sizeof(uint64_t);
907
908 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
909 if (ret) {
910 uint64_t *max_mem_alloc_size = ret;
911
912 *max_mem_alloc_size = rscreen->info.max_alloc_size;
913 }
914 return sizeof(uint64_t);
915
916 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
917 if (ret) {
918 uint32_t *max_clock_frequency = ret;
919 *max_clock_frequency = rscreen->info.max_shader_clock;
920 }
921 return sizeof(uint32_t);
922
923 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
924 if (ret) {
925 uint32_t *max_compute_units = ret;
926 *max_compute_units = rscreen->info.num_good_compute_units;
927 }
928 return sizeof(uint32_t);
929
930 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
931 if (ret) {
932 uint32_t *images_supported = ret;
933 *images_supported = 0;
934 }
935 return sizeof(uint32_t);
936 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
937 break; /* unused */
938 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
939 if (ret) {
940 uint32_t *subgroup_size = ret;
941 *subgroup_size = 64;
942 }
943 return sizeof(uint32_t);
944 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
945 if (ret) {
946 uint64_t *max_variable_threads_per_block = ret;
947 if (ir_type == PIPE_SHADER_IR_TGSI)
948 *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
949 else
950 *max_variable_threads_per_block = 0;
951 }
952 return sizeof(uint64_t);
953 }
954
955 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
956 return 0;
957 }
958
959 static uint64_t r600_get_timestamp(struct pipe_screen *screen)
960 {
961 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
962
963 return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
964 rscreen->info.clock_crystal_freq;
965 }
966
967 static void r600_query_memory_info(struct pipe_screen *screen,
968 struct pipe_memory_info *info)
969 {
970 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
971 struct radeon_winsys *ws = rscreen->ws;
972 unsigned vram_usage, gtt_usage;
973
974 info->total_device_memory = rscreen->info.vram_size / 1024;
975 info->total_staging_memory = rscreen->info.gart_size / 1024;
976
977 /* The real TTM memory usage is somewhat random, because:
978 *
979 * 1) TTM delays freeing memory, because it can only free it after
980 * fences expire.
981 *
982 * 2) The memory usage can be really low if big VRAM evictions are
983 * taking place, but the real usage is well above the size of VRAM.
984 *
985 * Instead, return statistics of this process.
986 */
987 vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
988 gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
989
990 info->avail_device_memory =
991 vram_usage <= info->total_device_memory ?
992 info->total_device_memory - vram_usage : 0;
993 info->avail_staging_memory =
994 gtt_usage <= info->total_staging_memory ?
995 info->total_staging_memory - gtt_usage : 0;
996
997 info->device_memory_evicted =
998 ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
999
1000 if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
1001 info->nr_device_memory_evictions =
1002 ws->query_value(ws, RADEON_NUM_EVICTIONS);
1003 else
1004 /* Just return the number of evicted 64KB pages. */
1005 info->nr_device_memory_evictions = info->device_memory_evicted / 64;
1006 }
1007
1008 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
1009 const struct pipe_resource *templ)
1010 {
1011 if (templ->target == PIPE_BUFFER) {
1012 return si_buffer_create(screen, templ, 256);
1013 } else {
1014 return si_texture_create(screen, templ);
1015 }
1016 }
1017
1018 bool si_common_screen_init(struct r600_common_screen *rscreen,
1019 struct radeon_winsys *ws)
1020 {
1021 char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
1022 struct utsname uname_data;
1023 const char *chip_name;
1024
1025 ws->query_info(ws, &rscreen->info);
1026 rscreen->ws = ws;
1027
1028 if ((chip_name = r600_get_marketing_name(ws)))
1029 snprintf(family_name, sizeof(family_name), "%s / ",
1030 r600_get_family_name(rscreen) + 4);
1031 else
1032 chip_name = r600_get_family_name(rscreen);
1033
1034 if (uname(&uname_data) == 0)
1035 snprintf(kernel_version, sizeof(kernel_version),
1036 " / %s", uname_data.release);
1037
1038 if (HAVE_LLVM > 0) {
1039 snprintf(llvm_string, sizeof(llvm_string),
1040 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
1041 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
1042 }
1043
1044 snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
1045 "%s (%sDRM %i.%i.%i%s%s)",
1046 chip_name, family_name, rscreen->info.drm_major,
1047 rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
1048 kernel_version, llvm_string);
1049
1050 rscreen->b.get_name = r600_get_name;
1051 rscreen->b.get_vendor = r600_get_vendor;
1052 rscreen->b.get_device_vendor = r600_get_device_vendor;
1053 rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
1054 rscreen->b.get_compute_param = r600_get_compute_param;
1055 rscreen->b.get_paramf = r600_get_paramf;
1056 rscreen->b.get_timestamp = r600_get_timestamp;
1057 rscreen->b.resource_destroy = u_resource_destroy_vtbl;
1058 rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
1059 rscreen->b.query_memory_info = r600_query_memory_info;
1060
1061 if (rscreen->info.has_hw_decode) {
1062 rscreen->b.get_video_param = si_vid_get_video_param;
1063 rscreen->b.is_video_format_supported = si_vid_is_format_supported;
1064 } else {
1065 rscreen->b.get_video_param = r600_get_video_param;
1066 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
1067 }
1068
1069 si_init_screen_texture_functions(rscreen);
1070 si_init_screen_query_functions(rscreen);
1071
1072 rscreen->family = rscreen->info.family;
1073 rscreen->chip_class = rscreen->info.chip_class;
1074 rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
1075 rscreen->has_rbplus = false;
1076 rscreen->rbplus_allowed = false;
1077
1078 r600_disk_cache_create(rscreen);
1079
1080 slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
1081
1082 rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
1083 if (rscreen->force_aniso >= 0) {
1084 printf("radeon: Forcing anisotropy filter to %ix\n",
1085 /* round down to a power of two */
1086 1 << util_logbase2(rscreen->force_aniso));
1087 }
1088
1089 (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
1090 (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
1091
1092 if (rscreen->debug_flags & DBG(INFO)) {
1093 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
1094 rscreen->info.pci_domain, rscreen->info.pci_bus,
1095 rscreen->info.pci_dev, rscreen->info.pci_func);
1096 printf("pci_id = 0x%x\n", rscreen->info.pci_id);
1097 printf("family = %i (%s)\n", rscreen->info.family,
1098 r600_get_family_name(rscreen));
1099 printf("chip_class = %i\n", rscreen->info.chip_class);
1100 printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
1101 printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
1102 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
1103 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
1104 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
1105 printf("max_alloc_size = %i MB\n",
1106 (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
1107 printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
1108 printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
1109 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
1110 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
1111 printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
1112 printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
1113 printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
1114 printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
1115 printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
1116 printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
1117 printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
1118 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
1119 printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
1120 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
1121 printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
1122 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
1123 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
1124 printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
1125 printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
1126 rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
1127 printf("has_userptr = %i\n", rscreen->info.has_userptr);
1128 printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
1129 printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
1130
1131 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
1132 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
1133 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
1134 printf("max_se = %i\n", rscreen->info.max_se);
1135 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
1136
1137 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
1138 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
1139 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
1140 printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
1141 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
1142 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
1143 printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
1144 printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
1145 }
1146 return true;
1147 }
1148
1149 void si_destroy_common_screen(struct r600_common_screen *rscreen)
1150 {
1151 si_perfcounters_destroy(rscreen);
1152 si_gpu_load_kill_thread(rscreen);
1153
1154 mtx_destroy(&rscreen->gpu_load_mutex);
1155 mtx_destroy(&rscreen->aux_context_lock);
1156 rscreen->aux_context->destroy(rscreen->aux_context);
1157
1158 slab_destroy_parent(&rscreen->pool_transfers);
1159
1160 disk_cache_destroy(rscreen->disk_shader_cache);
1161 rscreen->ws->destroy(rscreen->ws);
1162 FREE(rscreen);
1163 }
1164
1165 bool si_can_dump_shader(struct r600_common_screen *rscreen,
1166 unsigned processor)
1167 {
1168 return rscreen->debug_flags & (1 << processor);
1169 }
1170
1171 bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
1172 {
1173 return (rscreen->debug_flags & DBG(CHECK_IR)) ||
1174 si_can_dump_shader(rscreen, processor);
1175 }
1176
1177 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
1178 uint64_t offset, uint64_t size, unsigned value)
1179 {
1180 struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
1181
1182 mtx_lock(&rscreen->aux_context_lock);
1183 rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
1184 rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
1185 mtx_unlock(&rscreen->aux_context_lock);
1186 }