radeonsi: remove r600_common_context::clear_buffer
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "r600_pipe_common.h"
25 #include "r600_cs.h"
26 #include "tgsi/tgsi_parse.h"
27 #include "util/list.h"
28 #include "util/u_draw_quad.h"
29 #include "util/u_memory.h"
30 #include "util/u_format_s3tc.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/os_time.h"
33 #include "vl/vl_decoder.h"
34 #include "vl/vl_video_buffer.h"
35 #include "radeon/radeon_video.h"
36 #include "amd/common/ac_llvm_util.h"
37 #include "amd/common/sid.h"
38 #include <inttypes.h>
39 #include <sys/utsname.h>
40
41 #include <llvm-c/TargetMachine.h>
42
43
44 /*
45 * shader binary helpers.
46 */
47 void si_radeon_shader_binary_init(struct ac_shader_binary *b)
48 {
49 memset(b, 0, sizeof(*b));
50 }
51
52 void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
53 {
54 if (!b)
55 return;
56 FREE(b->code);
57 FREE(b->config);
58 FREE(b->rodata);
59 FREE(b->global_symbol_offsets);
60 FREE(b->relocs);
61 FREE(b->disasm_string);
62 FREE(b->llvm_ir_string);
63 }
64
65 /*
66 * pipe_context
67 */
68
69 /**
70 * Write an EOP event.
71 *
72 * \param event EVENT_TYPE_*
73 * \param event_flags Optional cache flush flags (TC)
74 * \param data_sel 1 = fence, 3 = timestamp
75 * \param buf Buffer
76 * \param va GPU address
77 * \param old_value Previous fence value (for a bug workaround)
78 * \param new_value Fence value to write for this event.
79 */
80 void si_gfx_write_event_eop(struct r600_common_context *ctx,
81 unsigned event, unsigned event_flags,
82 unsigned data_sel,
83 struct r600_resource *buf, uint64_t va,
84 uint32_t new_fence, unsigned query_type)
85 {
86 struct radeon_winsys_cs *cs = ctx->gfx.cs;
87 unsigned op = EVENT_TYPE(event) |
88 EVENT_INDEX(5) |
89 event_flags;
90 unsigned sel = EOP_DATA_SEL(data_sel);
91
92 /* Wait for write confirmation before writing data, but don't send
93 * an interrupt. */
94 if (data_sel != EOP_DATA_SEL_DISCARD)
95 sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
96
97 if (ctx->chip_class >= GFX9) {
98 /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
99 * counters) must immediately precede every timestamp event to
100 * prevent a GPU hang on GFX9.
101 *
102 * Occlusion queries don't need to do it here, because they
103 * always do ZPASS_DONE before the timestamp.
104 */
105 if (ctx->chip_class == GFX9 &&
106 query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
107 query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
108 query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
109 struct r600_resource *scratch = ctx->eop_bug_scratch;
110
111 assert(16 * ctx->screen->info.num_render_backends <=
112 scratch->b.b.width0);
113 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
114 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
115 radeon_emit(cs, scratch->gpu_address);
116 radeon_emit(cs, scratch->gpu_address >> 32);
117
118 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
119 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
120 }
121
122 radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
123 radeon_emit(cs, op);
124 radeon_emit(cs, sel);
125 radeon_emit(cs, va); /* address lo */
126 radeon_emit(cs, va >> 32); /* address hi */
127 radeon_emit(cs, new_fence); /* immediate data lo */
128 radeon_emit(cs, 0); /* immediate data hi */
129 radeon_emit(cs, 0); /* unused */
130 } else {
131 if (ctx->chip_class == CIK ||
132 ctx->chip_class == VI) {
133 struct r600_resource *scratch = ctx->eop_bug_scratch;
134 uint64_t va = scratch->gpu_address;
135
136 /* Two EOP events are required to make all engines go idle
137 * (and optional cache flushes executed) before the timestamp
138 * is written.
139 */
140 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
141 radeon_emit(cs, op);
142 radeon_emit(cs, va);
143 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
144 radeon_emit(cs, 0); /* immediate data */
145 radeon_emit(cs, 0); /* unused */
146
147 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
148 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
149 }
150
151 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
152 radeon_emit(cs, op);
153 radeon_emit(cs, va);
154 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
155 radeon_emit(cs, new_fence); /* immediate data */
156 radeon_emit(cs, 0); /* unused */
157 }
158
159 if (buf) {
160 radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
161 RADEON_PRIO_QUERY);
162 }
163 }
164
165 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
166 {
167 unsigned dwords = 6;
168
169 if (screen->chip_class == CIK ||
170 screen->chip_class == VI)
171 dwords *= 2;
172
173 if (!screen->info.has_virtual_memory)
174 dwords += 2;
175
176 return dwords;
177 }
178
179 void si_gfx_wait_fence(struct r600_common_context *ctx,
180 uint64_t va, uint32_t ref, uint32_t mask)
181 {
182 struct radeon_winsys_cs *cs = ctx->gfx.cs;
183
184 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
185 radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
186 radeon_emit(cs, va);
187 radeon_emit(cs, va >> 32);
188 radeon_emit(cs, ref); /* reference value */
189 radeon_emit(cs, mask); /* mask */
190 radeon_emit(cs, 4); /* poll interval */
191 }
192
193 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
194 {
195 struct radeon_winsys_cs *cs = rctx->dma.cs;
196
197 /* NOP waits for idle on Evergreen and later. */
198 if (rctx->chip_class >= CIK)
199 radeon_emit(cs, 0x00000000); /* NOP */
200 else
201 radeon_emit(cs, 0xf0000000); /* NOP */
202 }
203
204 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
205 struct r600_resource *dst, struct r600_resource *src)
206 {
207 uint64_t vram = ctx->dma.cs->used_vram;
208 uint64_t gtt = ctx->dma.cs->used_gart;
209
210 if (dst) {
211 vram += dst->vram_usage;
212 gtt += dst->gart_usage;
213 }
214 if (src) {
215 vram += src->vram_usage;
216 gtt += src->gart_usage;
217 }
218
219 /* Flush the GFX IB if DMA depends on it. */
220 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
221 ((dst &&
222 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
223 RADEON_USAGE_READWRITE)) ||
224 (src &&
225 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
226 RADEON_USAGE_WRITE))))
227 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
228
229 /* Flush if there's not enough space, or if the memory usage per IB
230 * is too large.
231 *
232 * IBs using too little memory are limited by the IB submission overhead.
233 * IBs using too much memory are limited by the kernel/TTM overhead.
234 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
235 *
236 * This heuristic makes sure that DMA requests are executed
237 * very soon after the call is made and lowers memory usage.
238 * It improves texture upload performance by keeping the DMA
239 * engine busy while uploads are being submitted.
240 */
241 num_dw++; /* for emit_wait_idle below */
242 if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
243 ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
244 !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
245 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
246 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
247 }
248
249 /* Wait for idle if either buffer has been used in the IB before to
250 * prevent read-after-write hazards.
251 */
252 if ((dst &&
253 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
254 RADEON_USAGE_READWRITE)) ||
255 (src &&
256 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
257 RADEON_USAGE_WRITE)))
258 r600_dma_emit_wait_idle(ctx);
259
260 /* If GPUVM is not supported, the CS checker needs 2 entries
261 * in the buffer list per packet, which has to be done manually.
262 */
263 if (ctx->screen->info.has_virtual_memory) {
264 if (dst)
265 radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
266 RADEON_USAGE_WRITE,
267 RADEON_PRIO_SDMA_BUFFER);
268 if (src)
269 radeon_add_to_buffer_list(ctx, &ctx->dma, src,
270 RADEON_USAGE_READ,
271 RADEON_PRIO_SDMA_BUFFER);
272 }
273
274 /* this function is called before all DMA calls, so increment this. */
275 ctx->num_dma_calls++;
276 }
277
278 static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
279 {
280 }
281
282 void si_preflush_suspend_features(struct r600_common_context *ctx)
283 {
284 /* suspend queries */
285 if (!LIST_IS_EMPTY(&ctx->active_queries))
286 si_suspend_queries(ctx);
287 }
288
289 void si_postflush_resume_features(struct r600_common_context *ctx)
290 {
291 /* resume queries */
292 if (!LIST_IS_EMPTY(&ctx->active_queries))
293 si_resume_queries(ctx);
294 }
295
296 static void r600_flush_dma_ring(void *ctx, unsigned flags,
297 struct pipe_fence_handle **fence)
298 {
299 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
300 struct radeon_winsys_cs *cs = rctx->dma.cs;
301 struct radeon_saved_cs saved;
302 bool check_vm =
303 (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
304 rctx->check_vm_faults;
305
306 if (!radeon_emitted(cs, 0)) {
307 if (fence)
308 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
309 return;
310 }
311
312 if (check_vm)
313 si_save_cs(rctx->ws, cs, &saved, true);
314
315 rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
316 if (fence)
317 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
318
319 if (check_vm) {
320 /* Use conservative timeout 800ms, after which we won't wait any
321 * longer and assume the GPU is hung.
322 */
323 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
324
325 rctx->check_vm_faults(rctx, &saved, RING_DMA);
326 si_clear_saved_cs(&saved);
327 }
328 }
329
330 /**
331 * Store a linearized copy of all chunks of \p cs together with the buffer
332 * list in \p saved.
333 */
334 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
335 struct radeon_saved_cs *saved, bool get_buffer_list)
336 {
337 uint32_t *buf;
338 unsigned i;
339
340 /* Save the IB chunks. */
341 saved->num_dw = cs->prev_dw + cs->current.cdw;
342 saved->ib = MALLOC(4 * saved->num_dw);
343 if (!saved->ib)
344 goto oom;
345
346 buf = saved->ib;
347 for (i = 0; i < cs->num_prev; ++i) {
348 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
349 buf += cs->prev[i].cdw;
350 }
351 memcpy(buf, cs->current.buf, cs->current.cdw * 4);
352
353 if (!get_buffer_list)
354 return;
355
356 /* Save the buffer list. */
357 saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
358 saved->bo_list = CALLOC(saved->bo_count,
359 sizeof(saved->bo_list[0]));
360 if (!saved->bo_list) {
361 FREE(saved->ib);
362 goto oom;
363 }
364 ws->cs_get_buffer_list(cs, saved->bo_list);
365
366 return;
367
368 oom:
369 fprintf(stderr, "%s: out of memory\n", __func__);
370 memset(saved, 0, sizeof(*saved));
371 }
372
373 void si_clear_saved_cs(struct radeon_saved_cs *saved)
374 {
375 FREE(saved->ib);
376 FREE(saved->bo_list);
377
378 memset(saved, 0, sizeof(*saved));
379 }
380
381 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
382 {
383 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
384 unsigned latest = rctx->ws->query_value(rctx->ws,
385 RADEON_GPU_RESET_COUNTER);
386
387 if (rctx->gpu_reset_counter == latest)
388 return PIPE_NO_RESET;
389
390 rctx->gpu_reset_counter = latest;
391 return PIPE_UNKNOWN_CONTEXT_RESET;
392 }
393
394 static void r600_set_device_reset_callback(struct pipe_context *ctx,
395 const struct pipe_device_reset_callback *cb)
396 {
397 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
398
399 if (cb)
400 rctx->device_reset_callback = *cb;
401 else
402 memset(&rctx->device_reset_callback, 0,
403 sizeof(rctx->device_reset_callback));
404 }
405
406 bool si_check_device_reset(struct r600_common_context *rctx)
407 {
408 enum pipe_reset_status status;
409
410 if (!rctx->device_reset_callback.reset)
411 return false;
412
413 if (!rctx->b.get_device_reset_status)
414 return false;
415
416 status = rctx->b.get_device_reset_status(&rctx->b);
417 if (status == PIPE_NO_RESET)
418 return false;
419
420 rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
421 return true;
422 }
423
424 static bool r600_resource_commit(struct pipe_context *pctx,
425 struct pipe_resource *resource,
426 unsigned level, struct pipe_box *box,
427 bool commit)
428 {
429 struct r600_common_context *ctx = (struct r600_common_context *)pctx;
430 struct r600_resource *res = r600_resource(resource);
431
432 /*
433 * Since buffer commitment changes cannot be pipelined, we need to
434 * (a) flush any pending commands that refer to the buffer we're about
435 * to change, and
436 * (b) wait for threaded submit to finish, including those that were
437 * triggered by some other, earlier operation.
438 */
439 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
440 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
441 res->buf, RADEON_USAGE_READWRITE)) {
442 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
443 }
444 if (radeon_emitted(ctx->dma.cs, 0) &&
445 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
446 res->buf, RADEON_USAGE_READWRITE)) {
447 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
448 }
449
450 ctx->ws->cs_sync_flush(ctx->dma.cs);
451 ctx->ws->cs_sync_flush(ctx->gfx.cs);
452
453 assert(resource->target == PIPE_BUFFER);
454
455 return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
456 }
457
458 bool si_common_context_init(struct r600_common_context *rctx,
459 struct r600_common_screen *rscreen,
460 unsigned context_flags)
461 {
462 slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
463 slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
464
465 rctx->screen = rscreen;
466 rctx->ws = rscreen->ws;
467 rctx->family = rscreen->family;
468 rctx->chip_class = rscreen->chip_class;
469
470 rctx->b.invalidate_resource = si_invalidate_resource;
471 rctx->b.resource_commit = r600_resource_commit;
472 rctx->b.transfer_map = u_transfer_map_vtbl;
473 rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
474 rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
475 rctx->b.texture_subdata = u_default_texture_subdata;
476 rctx->b.memory_barrier = r600_memory_barrier;
477 rctx->b.buffer_subdata = si_buffer_subdata;
478
479 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
480 rctx->b.get_device_reset_status = r600_get_reset_status;
481 rctx->gpu_reset_counter =
482 rctx->ws->query_value(rctx->ws,
483 RADEON_GPU_RESET_COUNTER);
484 }
485
486 rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
487
488 si_init_context_texture_functions(rctx);
489 si_init_query_functions(rctx);
490
491 if (rctx->chip_class == CIK ||
492 rctx->chip_class == VI ||
493 rctx->chip_class == GFX9) {
494 rctx->eop_bug_scratch = (struct r600_resource*)
495 pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
496 16 * rscreen->info.num_render_backends);
497 if (!rctx->eop_bug_scratch)
498 return false;
499 }
500
501 rctx->allocator_zeroed_memory =
502 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
503 0, PIPE_USAGE_DEFAULT, 0, true);
504 if (!rctx->allocator_zeroed_memory)
505 return false;
506
507 rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
508 0, PIPE_USAGE_STREAM);
509 if (!rctx->b.stream_uploader)
510 return false;
511
512 rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
513 0, PIPE_USAGE_DEFAULT);
514 if (!rctx->b.const_uploader)
515 return false;
516
517 rctx->ctx = rctx->ws->ctx_create(rctx->ws);
518 if (!rctx->ctx)
519 return false;
520
521 if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
522 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
523 r600_flush_dma_ring,
524 rctx);
525 rctx->dma.flush = r600_flush_dma_ring;
526 }
527
528 return true;
529 }
530
531 void si_common_context_cleanup(struct r600_common_context *rctx)
532 {
533 unsigned i,j;
534
535 /* Release DCC stats. */
536 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
537 assert(!rctx->dcc_stats[i].query_active);
538
539 for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
540 if (rctx->dcc_stats[i].ps_stats[j])
541 rctx->b.destroy_query(&rctx->b,
542 rctx->dcc_stats[i].ps_stats[j]);
543
544 r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
545 }
546
547 if (rctx->query_result_shader)
548 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
549
550 if (rctx->gfx.cs)
551 rctx->ws->cs_destroy(rctx->gfx.cs);
552 if (rctx->dma.cs)
553 rctx->ws->cs_destroy(rctx->dma.cs);
554 if (rctx->ctx)
555 rctx->ws->ctx_destroy(rctx->ctx);
556
557 if (rctx->b.stream_uploader)
558 u_upload_destroy(rctx->b.stream_uploader);
559 if (rctx->b.const_uploader)
560 u_upload_destroy(rctx->b.const_uploader);
561
562 slab_destroy_child(&rctx->pool_transfers);
563 slab_destroy_child(&rctx->pool_transfers_unsync);
564
565 if (rctx->allocator_zeroed_memory) {
566 u_suballocator_destroy(rctx->allocator_zeroed_memory);
567 }
568 rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
569 rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
570 r600_resource_reference(&rctx->eop_bug_scratch, NULL);
571 }
572
573 /*
574 * pipe_screen
575 */
576
577 static const struct debug_named_value common_debug_options[] = {
578 /* logging */
579 { "tex", DBG(TEX), "Print texture info" },
580 { "nir", DBG(NIR), "Enable experimental NIR shaders" },
581 { "compute", DBG(COMPUTE), "Print compute info" },
582 { "vm", DBG(VM), "Print virtual addresses when creating resources" },
583 { "info", DBG(INFO), "Print driver information" },
584
585 /* shaders */
586 { "vs", DBG(VS), "Print vertex shaders" },
587 { "gs", DBG(GS), "Print geometry shaders" },
588 { "ps", DBG(PS), "Print pixel shaders" },
589 { "cs", DBG(CS), "Print compute shaders" },
590 { "tcs", DBG(TCS), "Print tessellation control shaders" },
591 { "tes", DBG(TES), "Print tessellation evaluation shaders" },
592 { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
593 { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
594 { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
595 { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
596 { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
597 { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
598
599 { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
600 { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
601 { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
602 { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
603
604 /* features */
605 { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
606 { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
607 { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
608 { "notiling", DBG(NO_TILING), "Disable tiling" },
609 { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
610 { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
611 { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
612 { "nowc", DBG(NO_WC), "Disable GTT write combining" },
613 { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
614 { "nodcc", DBG(NO_DCC), "Disable DCC." },
615 { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
616 { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
617 { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
618 { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
619 { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
620 { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
621 { "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
622 { "dccmsaa", DBG(DCC_MSAA), "Enable DCC for MSAA" },
623 { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
624 { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
625 { "dpbb", DBG(DPBB), "Enable DPBB." },
626 { "dfsm", DBG(DFSM), "Enable DFSM." },
627 { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
628 { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." },
629
630 DEBUG_NAMED_VALUE_END /* must be last */
631 };
632
633 static const char* r600_get_vendor(struct pipe_screen* pscreen)
634 {
635 return "X.Org";
636 }
637
638 static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
639 {
640 return "AMD";
641 }
642
643 static const char *r600_get_marketing_name(struct radeon_winsys *ws)
644 {
645 if (!ws->get_chip_name)
646 return NULL;
647 return ws->get_chip_name(ws);
648 }
649
650 static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
651 {
652 switch (rscreen->info.family) {
653 case CHIP_TAHITI: return "AMD TAHITI";
654 case CHIP_PITCAIRN: return "AMD PITCAIRN";
655 case CHIP_VERDE: return "AMD CAPE VERDE";
656 case CHIP_OLAND: return "AMD OLAND";
657 case CHIP_HAINAN: return "AMD HAINAN";
658 case CHIP_BONAIRE: return "AMD BONAIRE";
659 case CHIP_KAVERI: return "AMD KAVERI";
660 case CHIP_KABINI: return "AMD KABINI";
661 case CHIP_HAWAII: return "AMD HAWAII";
662 case CHIP_MULLINS: return "AMD MULLINS";
663 case CHIP_TONGA: return "AMD TONGA";
664 case CHIP_ICELAND: return "AMD ICELAND";
665 case CHIP_CARRIZO: return "AMD CARRIZO";
666 case CHIP_FIJI: return "AMD FIJI";
667 case CHIP_POLARIS10: return "AMD POLARIS10";
668 case CHIP_POLARIS11: return "AMD POLARIS11";
669 case CHIP_POLARIS12: return "AMD POLARIS12";
670 case CHIP_STONEY: return "AMD STONEY";
671 case CHIP_VEGA10: return "AMD VEGA10";
672 case CHIP_RAVEN: return "AMD RAVEN";
673 default: return "AMD unknown";
674 }
675 }
676
677 static void r600_disk_cache_create(struct r600_common_screen *rscreen)
678 {
679 /* Don't use the cache if shader dumping is enabled. */
680 if (rscreen->debug_flags & DBG_ALL_SHADERS)
681 return;
682
683 /* TODO: remove this once gallium supports a nir cache */
684 if (rscreen->debug_flags & DBG(NIR))
685 return;
686
687 uint32_t mesa_timestamp;
688 if (disk_cache_get_function_timestamp(r600_disk_cache_create,
689 &mesa_timestamp)) {
690 char *timestamp_str;
691 int res = -1;
692 uint32_t llvm_timestamp;
693
694 if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
695 &llvm_timestamp)) {
696 res = asprintf(&timestamp_str, "%u_%u",
697 mesa_timestamp, llvm_timestamp);
698 }
699
700 if (res != -1) {
701 /* These flags affect shader compilation. */
702 uint64_t shader_debug_flags =
703 rscreen->debug_flags &
704 (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
705 DBG(SI_SCHED) |
706 DBG(UNSAFE_MATH));
707
708 rscreen->disk_shader_cache =
709 disk_cache_create(r600_get_family_name(rscreen),
710 timestamp_str,
711 shader_debug_flags);
712 free(timestamp_str);
713 }
714 }
715 }
716
717 static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
718 {
719 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
720 return rscreen->disk_shader_cache;
721 }
722
723 static const char* r600_get_name(struct pipe_screen* pscreen)
724 {
725 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
726
727 return rscreen->renderer_string;
728 }
729
730 static float r600_get_paramf(struct pipe_screen* pscreen,
731 enum pipe_capf param)
732 {
733 switch (param) {
734 case PIPE_CAPF_MAX_LINE_WIDTH:
735 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
736 case PIPE_CAPF_MAX_POINT_WIDTH:
737 case PIPE_CAPF_MAX_POINT_WIDTH_AA:
738 return 8192.0f;
739 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
740 return 16.0f;
741 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
742 return 16.0f;
743 case PIPE_CAPF_GUARD_BAND_LEFT:
744 case PIPE_CAPF_GUARD_BAND_TOP:
745 case PIPE_CAPF_GUARD_BAND_RIGHT:
746 case PIPE_CAPF_GUARD_BAND_BOTTOM:
747 return 0.0f;
748 }
749 return 0.0f;
750 }
751
752 static int r600_get_video_param(struct pipe_screen *screen,
753 enum pipe_video_profile profile,
754 enum pipe_video_entrypoint entrypoint,
755 enum pipe_video_cap param)
756 {
757 switch (param) {
758 case PIPE_VIDEO_CAP_SUPPORTED:
759 return vl_profile_supported(screen, profile, entrypoint);
760 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
761 return 1;
762 case PIPE_VIDEO_CAP_MAX_WIDTH:
763 case PIPE_VIDEO_CAP_MAX_HEIGHT:
764 return vl_video_buffer_max_size(screen);
765 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
766 return PIPE_FORMAT_NV12;
767 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
768 return false;
769 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
770 return false;
771 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
772 return true;
773 case PIPE_VIDEO_CAP_MAX_LEVEL:
774 return vl_level_supported(screen, profile);
775 default:
776 return 0;
777 }
778 }
779
780 static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
781 enum pipe_shader_ir ir_type)
782 {
783 if (ir_type != PIPE_SHADER_IR_TGSI)
784 return 256;
785
786 /* Only 16 waves per thread-group on gfx9. */
787 if (screen->chip_class >= GFX9)
788 return 1024;
789
790 /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
791 * round number.
792 */
793 return 2048;
794 }
795
796 static int r600_get_compute_param(struct pipe_screen *screen,
797 enum pipe_shader_ir ir_type,
798 enum pipe_compute_cap param,
799 void *ret)
800 {
801 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
802
803 //TODO: select these params by asic
804 switch (param) {
805 case PIPE_COMPUTE_CAP_IR_TARGET: {
806 const char *gpu;
807 const char *triple;
808
809 if (HAVE_LLVM < 0x0400)
810 triple = "amdgcn--";
811 else
812 triple = "amdgcn-mesa-mesa3d";
813
814 gpu = ac_get_llvm_processor_name(rscreen->family);
815 if (ret) {
816 sprintf(ret, "%s-%s", gpu, triple);
817 }
818 /* +2 for dash and terminating NIL byte */
819 return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
820 }
821 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
822 if (ret) {
823 uint64_t *grid_dimension = ret;
824 grid_dimension[0] = 3;
825 }
826 return 1 * sizeof(uint64_t);
827
828 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
829 if (ret) {
830 uint64_t *grid_size = ret;
831 grid_size[0] = 65535;
832 grid_size[1] = 65535;
833 grid_size[2] = 65535;
834 }
835 return 3 * sizeof(uint64_t) ;
836
837 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
838 if (ret) {
839 uint64_t *block_size = ret;
840 unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
841 block_size[0] = threads_per_block;
842 block_size[1] = threads_per_block;
843 block_size[2] = threads_per_block;
844 }
845 return 3 * sizeof(uint64_t);
846
847 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
848 if (ret) {
849 uint64_t *max_threads_per_block = ret;
850 *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
851 }
852 return sizeof(uint64_t);
853 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
854 if (ret) {
855 uint32_t *address_bits = ret;
856 address_bits[0] = 64;
857 }
858 return 1 * sizeof(uint32_t);
859
860 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
861 if (ret) {
862 uint64_t *max_global_size = ret;
863 uint64_t max_mem_alloc_size;
864
865 r600_get_compute_param(screen, ir_type,
866 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
867 &max_mem_alloc_size);
868
869 /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
870 * 1/4 of the MAX_GLOBAL_SIZE. Since the
871 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
872 * make sure we never report more than
873 * 4 * MAX_MEM_ALLOC_SIZE.
874 */
875 *max_global_size = MIN2(4 * max_mem_alloc_size,
876 MAX2(rscreen->info.gart_size,
877 rscreen->info.vram_size));
878 }
879 return sizeof(uint64_t);
880
881 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
882 if (ret) {
883 uint64_t *max_local_size = ret;
884 /* Value reported by the closed source driver. */
885 *max_local_size = 32768;
886 }
887 return sizeof(uint64_t);
888
889 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
890 if (ret) {
891 uint64_t *max_input_size = ret;
892 /* Value reported by the closed source driver. */
893 *max_input_size = 1024;
894 }
895 return sizeof(uint64_t);
896
897 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
898 if (ret) {
899 uint64_t *max_mem_alloc_size = ret;
900
901 *max_mem_alloc_size = rscreen->info.max_alloc_size;
902 }
903 return sizeof(uint64_t);
904
905 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
906 if (ret) {
907 uint32_t *max_clock_frequency = ret;
908 *max_clock_frequency = rscreen->info.max_shader_clock;
909 }
910 return sizeof(uint32_t);
911
912 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
913 if (ret) {
914 uint32_t *max_compute_units = ret;
915 *max_compute_units = rscreen->info.num_good_compute_units;
916 }
917 return sizeof(uint32_t);
918
919 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
920 if (ret) {
921 uint32_t *images_supported = ret;
922 *images_supported = 0;
923 }
924 return sizeof(uint32_t);
925 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
926 break; /* unused */
927 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
928 if (ret) {
929 uint32_t *subgroup_size = ret;
930 *subgroup_size = 64;
931 }
932 return sizeof(uint32_t);
933 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
934 if (ret) {
935 uint64_t *max_variable_threads_per_block = ret;
936 if (ir_type == PIPE_SHADER_IR_TGSI)
937 *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
938 else
939 *max_variable_threads_per_block = 0;
940 }
941 return sizeof(uint64_t);
942 }
943
944 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
945 return 0;
946 }
947
948 static uint64_t r600_get_timestamp(struct pipe_screen *screen)
949 {
950 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
951
952 return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
953 rscreen->info.clock_crystal_freq;
954 }
955
956 static void r600_query_memory_info(struct pipe_screen *screen,
957 struct pipe_memory_info *info)
958 {
959 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
960 struct radeon_winsys *ws = rscreen->ws;
961 unsigned vram_usage, gtt_usage;
962
963 info->total_device_memory = rscreen->info.vram_size / 1024;
964 info->total_staging_memory = rscreen->info.gart_size / 1024;
965
966 /* The real TTM memory usage is somewhat random, because:
967 *
968 * 1) TTM delays freeing memory, because it can only free it after
969 * fences expire.
970 *
971 * 2) The memory usage can be really low if big VRAM evictions are
972 * taking place, but the real usage is well above the size of VRAM.
973 *
974 * Instead, return statistics of this process.
975 */
976 vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
977 gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
978
979 info->avail_device_memory =
980 vram_usage <= info->total_device_memory ?
981 info->total_device_memory - vram_usage : 0;
982 info->avail_staging_memory =
983 gtt_usage <= info->total_staging_memory ?
984 info->total_staging_memory - gtt_usage : 0;
985
986 info->device_memory_evicted =
987 ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
988
989 if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
990 info->nr_device_memory_evictions =
991 ws->query_value(ws, RADEON_NUM_EVICTIONS);
992 else
993 /* Just return the number of evicted 64KB pages. */
994 info->nr_device_memory_evictions = info->device_memory_evicted / 64;
995 }
996
997 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
998 const struct pipe_resource *templ)
999 {
1000 if (templ->target == PIPE_BUFFER) {
1001 return si_buffer_create(screen, templ, 256);
1002 } else {
1003 return si_texture_create(screen, templ);
1004 }
1005 }
1006
1007 bool si_common_screen_init(struct r600_common_screen *rscreen,
1008 struct radeon_winsys *ws)
1009 {
1010 char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
1011 struct utsname uname_data;
1012 const char *chip_name;
1013
1014 ws->query_info(ws, &rscreen->info);
1015 rscreen->ws = ws;
1016
1017 if ((chip_name = r600_get_marketing_name(ws)))
1018 snprintf(family_name, sizeof(family_name), "%s / ",
1019 r600_get_family_name(rscreen) + 4);
1020 else
1021 chip_name = r600_get_family_name(rscreen);
1022
1023 if (uname(&uname_data) == 0)
1024 snprintf(kernel_version, sizeof(kernel_version),
1025 " / %s", uname_data.release);
1026
1027 if (HAVE_LLVM > 0) {
1028 snprintf(llvm_string, sizeof(llvm_string),
1029 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
1030 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
1031 }
1032
1033 snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
1034 "%s (%sDRM %i.%i.%i%s%s)",
1035 chip_name, family_name, rscreen->info.drm_major,
1036 rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
1037 kernel_version, llvm_string);
1038
1039 rscreen->b.get_name = r600_get_name;
1040 rscreen->b.get_vendor = r600_get_vendor;
1041 rscreen->b.get_device_vendor = r600_get_device_vendor;
1042 rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
1043 rscreen->b.get_compute_param = r600_get_compute_param;
1044 rscreen->b.get_paramf = r600_get_paramf;
1045 rscreen->b.get_timestamp = r600_get_timestamp;
1046 rscreen->b.resource_destroy = u_resource_destroy_vtbl;
1047 rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
1048 rscreen->b.query_memory_info = r600_query_memory_info;
1049
1050 if (rscreen->info.has_hw_decode) {
1051 rscreen->b.get_video_param = si_vid_get_video_param;
1052 rscreen->b.is_video_format_supported = si_vid_is_format_supported;
1053 } else {
1054 rscreen->b.get_video_param = r600_get_video_param;
1055 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
1056 }
1057
1058 si_init_screen_texture_functions(rscreen);
1059 si_init_screen_query_functions(rscreen);
1060
1061 rscreen->family = rscreen->info.family;
1062 rscreen->chip_class = rscreen->info.chip_class;
1063 rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
1064 rscreen->has_rbplus = false;
1065 rscreen->rbplus_allowed = false;
1066
1067 r600_disk_cache_create(rscreen);
1068
1069 slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
1070
1071 rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
1072 if (rscreen->force_aniso >= 0) {
1073 printf("radeon: Forcing anisotropy filter to %ix\n",
1074 /* round down to a power of two */
1075 1 << util_logbase2(rscreen->force_aniso));
1076 }
1077
1078 (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
1079 (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
1080
1081 if (rscreen->debug_flags & DBG(INFO)) {
1082 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
1083 rscreen->info.pci_domain, rscreen->info.pci_bus,
1084 rscreen->info.pci_dev, rscreen->info.pci_func);
1085 printf("pci_id = 0x%x\n", rscreen->info.pci_id);
1086 printf("family = %i (%s)\n", rscreen->info.family,
1087 r600_get_family_name(rscreen));
1088 printf("chip_class = %i\n", rscreen->info.chip_class);
1089 printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
1090 printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
1091 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
1092 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
1093 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
1094 printf("max_alloc_size = %i MB\n",
1095 (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
1096 printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
1097 printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
1098 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
1099 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
1100 printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
1101 printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
1102 printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
1103 printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
1104 printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
1105 printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
1106 printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
1107 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
1108 printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
1109 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
1110 printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
1111 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
1112 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
1113 printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
1114 printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
1115 rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
1116 printf("has_userptr = %i\n", rscreen->info.has_userptr);
1117 printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
1118 printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
1119
1120 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
1121 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
1122 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
1123 printf("max_se = %i\n", rscreen->info.max_se);
1124 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
1125
1126 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
1127 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
1128 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
1129 printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
1130 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
1131 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
1132 printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
1133 printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
1134 }
1135 return true;
1136 }
1137
1138 void si_destroy_common_screen(struct r600_common_screen *rscreen)
1139 {
1140 si_perfcounters_destroy(rscreen);
1141 si_gpu_load_kill_thread(rscreen);
1142
1143 mtx_destroy(&rscreen->gpu_load_mutex);
1144 mtx_destroy(&rscreen->aux_context_lock);
1145 rscreen->aux_context->destroy(rscreen->aux_context);
1146
1147 slab_destroy_parent(&rscreen->pool_transfers);
1148
1149 disk_cache_destroy(rscreen->disk_shader_cache);
1150 rscreen->ws->destroy(rscreen->ws);
1151 FREE(rscreen);
1152 }
1153
1154 bool si_can_dump_shader(struct r600_common_screen *rscreen,
1155 unsigned processor)
1156 {
1157 return rscreen->debug_flags & (1 << processor);
1158 }
1159
1160 bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
1161 {
1162 return (rscreen->debug_flags & DBG(CHECK_IR)) ||
1163 si_can_dump_shader(rscreen, processor);
1164 }
1165
1166 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
1167 uint64_t offset, uint64_t size, unsigned value)
1168 {
1169 struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
1170
1171 mtx_lock(&rscreen->aux_context_lock);
1172 rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
1173 rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
1174 mtx_unlock(&rscreen->aux_context_lock);
1175 }