radeonsi: move/remove ac_shader_binary helpers
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.c
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "r600_pipe_common.h"
25 #include "r600_cs.h"
26 #include "tgsi/tgsi_parse.h"
27 #include "util/list.h"
28 #include "util/u_draw_quad.h"
29 #include "util/u_memory.h"
30 #include "util/u_format_s3tc.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/os_time.h"
33 #include "vl/vl_decoder.h"
34 #include "vl/vl_video_buffer.h"
35 #include "radeon/radeon_video.h"
36 #include "amd/common/ac_llvm_util.h"
37 #include "amd/common/sid.h"
38 #include <inttypes.h>
39
40 #include <llvm-c/TargetMachine.h>
41
42 /*
43 * pipe_context
44 */
45
46 /**
47 * Write an EOP event.
48 *
49 * \param event EVENT_TYPE_*
50 * \param event_flags Optional cache flush flags (TC)
51 * \param data_sel 1 = fence, 3 = timestamp
52 * \param buf Buffer
53 * \param va GPU address
54 * \param old_value Previous fence value (for a bug workaround)
55 * \param new_value Fence value to write for this event.
56 */
57 void si_gfx_write_event_eop(struct r600_common_context *ctx,
58 unsigned event, unsigned event_flags,
59 unsigned data_sel,
60 struct r600_resource *buf, uint64_t va,
61 uint32_t new_fence, unsigned query_type)
62 {
63 struct radeon_winsys_cs *cs = ctx->gfx.cs;
64 unsigned op = EVENT_TYPE(event) |
65 EVENT_INDEX(5) |
66 event_flags;
67 unsigned sel = EOP_DATA_SEL(data_sel);
68
69 /* Wait for write confirmation before writing data, but don't send
70 * an interrupt. */
71 if (data_sel != EOP_DATA_SEL_DISCARD)
72 sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
73
74 if (ctx->chip_class >= GFX9) {
75 /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
76 * counters) must immediately precede every timestamp event to
77 * prevent a GPU hang on GFX9.
78 *
79 * Occlusion queries don't need to do it here, because they
80 * always do ZPASS_DONE before the timestamp.
81 */
82 if (ctx->chip_class == GFX9 &&
83 query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
84 query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
85 query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
86 struct r600_resource *scratch = ctx->eop_bug_scratch;
87
88 assert(16 * ctx->screen->info.num_render_backends <=
89 scratch->b.b.width0);
90 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
91 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
92 radeon_emit(cs, scratch->gpu_address);
93 radeon_emit(cs, scratch->gpu_address >> 32);
94
95 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
96 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
97 }
98
99 radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
100 radeon_emit(cs, op);
101 radeon_emit(cs, sel);
102 radeon_emit(cs, va); /* address lo */
103 radeon_emit(cs, va >> 32); /* address hi */
104 radeon_emit(cs, new_fence); /* immediate data lo */
105 radeon_emit(cs, 0); /* immediate data hi */
106 radeon_emit(cs, 0); /* unused */
107 } else {
108 if (ctx->chip_class == CIK ||
109 ctx->chip_class == VI) {
110 struct r600_resource *scratch = ctx->eop_bug_scratch;
111 uint64_t va = scratch->gpu_address;
112
113 /* Two EOP events are required to make all engines go idle
114 * (and optional cache flushes executed) before the timestamp
115 * is written.
116 */
117 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
118 radeon_emit(cs, op);
119 radeon_emit(cs, va);
120 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
121 radeon_emit(cs, 0); /* immediate data */
122 radeon_emit(cs, 0); /* unused */
123
124 radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
125 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
126 }
127
128 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
129 radeon_emit(cs, op);
130 radeon_emit(cs, va);
131 radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
132 radeon_emit(cs, new_fence); /* immediate data */
133 radeon_emit(cs, 0); /* unused */
134 }
135
136 if (buf) {
137 radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
138 RADEON_PRIO_QUERY);
139 }
140 }
141
142 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
143 {
144 unsigned dwords = 6;
145
146 if (screen->chip_class == CIK ||
147 screen->chip_class == VI)
148 dwords *= 2;
149
150 if (!screen->info.has_virtual_memory)
151 dwords += 2;
152
153 return dwords;
154 }
155
156 void si_gfx_wait_fence(struct r600_common_context *ctx,
157 uint64_t va, uint32_t ref, uint32_t mask)
158 {
159 struct radeon_winsys_cs *cs = ctx->gfx.cs;
160
161 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
162 radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
163 radeon_emit(cs, va);
164 radeon_emit(cs, va >> 32);
165 radeon_emit(cs, ref); /* reference value */
166 radeon_emit(cs, mask); /* mask */
167 radeon_emit(cs, 4); /* poll interval */
168 }
169
170 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
171 {
172 struct radeon_winsys_cs *cs = rctx->dma.cs;
173
174 /* NOP waits for idle on Evergreen and later. */
175 if (rctx->chip_class >= CIK)
176 radeon_emit(cs, 0x00000000); /* NOP */
177 else
178 radeon_emit(cs, 0xf0000000); /* NOP */
179 }
180
181 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
182 struct r600_resource *dst, struct r600_resource *src)
183 {
184 uint64_t vram = ctx->dma.cs->used_vram;
185 uint64_t gtt = ctx->dma.cs->used_gart;
186
187 if (dst) {
188 vram += dst->vram_usage;
189 gtt += dst->gart_usage;
190 }
191 if (src) {
192 vram += src->vram_usage;
193 gtt += src->gart_usage;
194 }
195
196 /* Flush the GFX IB if DMA depends on it. */
197 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
198 ((dst &&
199 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
200 RADEON_USAGE_READWRITE)) ||
201 (src &&
202 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
203 RADEON_USAGE_WRITE))))
204 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
205
206 /* Flush if there's not enough space, or if the memory usage per IB
207 * is too large.
208 *
209 * IBs using too little memory are limited by the IB submission overhead.
210 * IBs using too much memory are limited by the kernel/TTM overhead.
211 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
212 *
213 * This heuristic makes sure that DMA requests are executed
214 * very soon after the call is made and lowers memory usage.
215 * It improves texture upload performance by keeping the DMA
216 * engine busy while uploads are being submitted.
217 */
218 num_dw++; /* for emit_wait_idle below */
219 if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
220 ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
221 !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
222 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
223 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
224 }
225
226 /* Wait for idle if either buffer has been used in the IB before to
227 * prevent read-after-write hazards.
228 */
229 if ((dst &&
230 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
231 RADEON_USAGE_READWRITE)) ||
232 (src &&
233 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
234 RADEON_USAGE_WRITE)))
235 r600_dma_emit_wait_idle(ctx);
236
237 /* If GPUVM is not supported, the CS checker needs 2 entries
238 * in the buffer list per packet, which has to be done manually.
239 */
240 if (ctx->screen->info.has_virtual_memory) {
241 if (dst)
242 radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
243 RADEON_USAGE_WRITE,
244 RADEON_PRIO_SDMA_BUFFER);
245 if (src)
246 radeon_add_to_buffer_list(ctx, &ctx->dma, src,
247 RADEON_USAGE_READ,
248 RADEON_PRIO_SDMA_BUFFER);
249 }
250
251 /* this function is called before all DMA calls, so increment this. */
252 ctx->num_dma_calls++;
253 }
254
255 static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
256 {
257 }
258
259 void si_preflush_suspend_features(struct r600_common_context *ctx)
260 {
261 /* suspend queries */
262 if (!LIST_IS_EMPTY(&ctx->active_queries))
263 si_suspend_queries(ctx);
264 }
265
266 void si_postflush_resume_features(struct r600_common_context *ctx)
267 {
268 /* resume queries */
269 if (!LIST_IS_EMPTY(&ctx->active_queries))
270 si_resume_queries(ctx);
271 }
272
273 static void r600_flush_dma_ring(void *ctx, unsigned flags,
274 struct pipe_fence_handle **fence)
275 {
276 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
277 struct radeon_winsys_cs *cs = rctx->dma.cs;
278 struct radeon_saved_cs saved;
279 bool check_vm =
280 (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
281 rctx->check_vm_faults;
282
283 if (!radeon_emitted(cs, 0)) {
284 if (fence)
285 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
286 return;
287 }
288
289 if (check_vm)
290 si_save_cs(rctx->ws, cs, &saved, true);
291
292 rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
293 if (fence)
294 rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
295
296 if (check_vm) {
297 /* Use conservative timeout 800ms, after which we won't wait any
298 * longer and assume the GPU is hung.
299 */
300 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
301
302 rctx->check_vm_faults(rctx, &saved, RING_DMA);
303 si_clear_saved_cs(&saved);
304 }
305 }
306
307 /**
308 * Store a linearized copy of all chunks of \p cs together with the buffer
309 * list in \p saved.
310 */
311 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
312 struct radeon_saved_cs *saved, bool get_buffer_list)
313 {
314 uint32_t *buf;
315 unsigned i;
316
317 /* Save the IB chunks. */
318 saved->num_dw = cs->prev_dw + cs->current.cdw;
319 saved->ib = MALLOC(4 * saved->num_dw);
320 if (!saved->ib)
321 goto oom;
322
323 buf = saved->ib;
324 for (i = 0; i < cs->num_prev; ++i) {
325 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
326 buf += cs->prev[i].cdw;
327 }
328 memcpy(buf, cs->current.buf, cs->current.cdw * 4);
329
330 if (!get_buffer_list)
331 return;
332
333 /* Save the buffer list. */
334 saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
335 saved->bo_list = CALLOC(saved->bo_count,
336 sizeof(saved->bo_list[0]));
337 if (!saved->bo_list) {
338 FREE(saved->ib);
339 goto oom;
340 }
341 ws->cs_get_buffer_list(cs, saved->bo_list);
342
343 return;
344
345 oom:
346 fprintf(stderr, "%s: out of memory\n", __func__);
347 memset(saved, 0, sizeof(*saved));
348 }
349
350 void si_clear_saved_cs(struct radeon_saved_cs *saved)
351 {
352 FREE(saved->ib);
353 FREE(saved->bo_list);
354
355 memset(saved, 0, sizeof(*saved));
356 }
357
358 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
359 {
360 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
361 unsigned latest = rctx->ws->query_value(rctx->ws,
362 RADEON_GPU_RESET_COUNTER);
363
364 if (rctx->gpu_reset_counter == latest)
365 return PIPE_NO_RESET;
366
367 rctx->gpu_reset_counter = latest;
368 return PIPE_UNKNOWN_CONTEXT_RESET;
369 }
370
371 static void r600_set_device_reset_callback(struct pipe_context *ctx,
372 const struct pipe_device_reset_callback *cb)
373 {
374 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
375
376 if (cb)
377 rctx->device_reset_callback = *cb;
378 else
379 memset(&rctx->device_reset_callback, 0,
380 sizeof(rctx->device_reset_callback));
381 }
382
383 bool si_check_device_reset(struct r600_common_context *rctx)
384 {
385 enum pipe_reset_status status;
386
387 if (!rctx->device_reset_callback.reset)
388 return false;
389
390 if (!rctx->b.get_device_reset_status)
391 return false;
392
393 status = rctx->b.get_device_reset_status(&rctx->b);
394 if (status == PIPE_NO_RESET)
395 return false;
396
397 rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
398 return true;
399 }
400
401 static bool r600_resource_commit(struct pipe_context *pctx,
402 struct pipe_resource *resource,
403 unsigned level, struct pipe_box *box,
404 bool commit)
405 {
406 struct r600_common_context *ctx = (struct r600_common_context *)pctx;
407 struct r600_resource *res = r600_resource(resource);
408
409 /*
410 * Since buffer commitment changes cannot be pipelined, we need to
411 * (a) flush any pending commands that refer to the buffer we're about
412 * to change, and
413 * (b) wait for threaded submit to finish, including those that were
414 * triggered by some other, earlier operation.
415 */
416 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
417 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
418 res->buf, RADEON_USAGE_READWRITE)) {
419 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
420 }
421 if (radeon_emitted(ctx->dma.cs, 0) &&
422 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
423 res->buf, RADEON_USAGE_READWRITE)) {
424 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
425 }
426
427 ctx->ws->cs_sync_flush(ctx->dma.cs);
428 ctx->ws->cs_sync_flush(ctx->gfx.cs);
429
430 assert(resource->target == PIPE_BUFFER);
431
432 return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
433 }
434
435 bool si_common_context_init(struct r600_common_context *rctx,
436 struct r600_common_screen *rscreen,
437 unsigned context_flags)
438 {
439 slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
440 slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
441
442 rctx->screen = rscreen;
443 rctx->ws = rscreen->ws;
444 rctx->family = rscreen->family;
445 rctx->chip_class = rscreen->chip_class;
446
447 rctx->b.invalidate_resource = si_invalidate_resource;
448 rctx->b.resource_commit = r600_resource_commit;
449 rctx->b.transfer_map = u_transfer_map_vtbl;
450 rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
451 rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
452 rctx->b.texture_subdata = u_default_texture_subdata;
453 rctx->b.memory_barrier = r600_memory_barrier;
454 rctx->b.buffer_subdata = si_buffer_subdata;
455
456 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
457 rctx->b.get_device_reset_status = r600_get_reset_status;
458 rctx->gpu_reset_counter =
459 rctx->ws->query_value(rctx->ws,
460 RADEON_GPU_RESET_COUNTER);
461 }
462
463 rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
464
465 si_init_context_texture_functions(rctx);
466 si_init_query_functions(rctx);
467
468 if (rctx->chip_class == CIK ||
469 rctx->chip_class == VI ||
470 rctx->chip_class == GFX9) {
471 rctx->eop_bug_scratch = (struct r600_resource*)
472 pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
473 16 * rscreen->info.num_render_backends);
474 if (!rctx->eop_bug_scratch)
475 return false;
476 }
477
478 rctx->allocator_zeroed_memory =
479 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
480 0, PIPE_USAGE_DEFAULT, 0, true);
481 if (!rctx->allocator_zeroed_memory)
482 return false;
483
484 rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
485 0, PIPE_USAGE_STREAM);
486 if (!rctx->b.stream_uploader)
487 return false;
488
489 rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
490 0, PIPE_USAGE_DEFAULT);
491 if (!rctx->b.const_uploader)
492 return false;
493
494 rctx->ctx = rctx->ws->ctx_create(rctx->ws);
495 if (!rctx->ctx)
496 return false;
497
498 if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
499 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
500 r600_flush_dma_ring,
501 rctx);
502 rctx->dma.flush = r600_flush_dma_ring;
503 }
504
505 return true;
506 }
507
508 void si_common_context_cleanup(struct r600_common_context *rctx)
509 {
510 unsigned i,j;
511
512 /* Release DCC stats. */
513 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
514 assert(!rctx->dcc_stats[i].query_active);
515
516 for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
517 if (rctx->dcc_stats[i].ps_stats[j])
518 rctx->b.destroy_query(&rctx->b,
519 rctx->dcc_stats[i].ps_stats[j]);
520
521 r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
522 }
523
524 if (rctx->query_result_shader)
525 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
526
527 if (rctx->gfx.cs)
528 rctx->ws->cs_destroy(rctx->gfx.cs);
529 if (rctx->dma.cs)
530 rctx->ws->cs_destroy(rctx->dma.cs);
531 if (rctx->ctx)
532 rctx->ws->ctx_destroy(rctx->ctx);
533
534 if (rctx->b.stream_uploader)
535 u_upload_destroy(rctx->b.stream_uploader);
536 if (rctx->b.const_uploader)
537 u_upload_destroy(rctx->b.const_uploader);
538
539 slab_destroy_child(&rctx->pool_transfers);
540 slab_destroy_child(&rctx->pool_transfers_unsync);
541
542 if (rctx->allocator_zeroed_memory) {
543 u_suballocator_destroy(rctx->allocator_zeroed_memory);
544 }
545 rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
546 rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
547 r600_resource_reference(&rctx->eop_bug_scratch, NULL);
548 }
549
550 /*
551 * pipe_screen
552 */
553
554 static const struct debug_named_value common_debug_options[] = {
555 /* logging */
556 { "tex", DBG(TEX), "Print texture info" },
557 { "nir", DBG(NIR), "Enable experimental NIR shaders" },
558 { "compute", DBG(COMPUTE), "Print compute info" },
559 { "vm", DBG(VM), "Print virtual addresses when creating resources" },
560 { "info", DBG(INFO), "Print driver information" },
561
562 /* shaders */
563 { "vs", DBG(VS), "Print vertex shaders" },
564 { "gs", DBG(GS), "Print geometry shaders" },
565 { "ps", DBG(PS), "Print pixel shaders" },
566 { "cs", DBG(CS), "Print compute shaders" },
567 { "tcs", DBG(TCS), "Print tessellation control shaders" },
568 { "tes", DBG(TES), "Print tessellation evaluation shaders" },
569 { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
570 { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
571 { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
572 { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
573 { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
574 { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
575
576 { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
577 { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
578 { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
579 { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
580
581 /* features */
582 { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
583 { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
584 { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
585 { "notiling", DBG(NO_TILING), "Disable tiling" },
586 { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
587 { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
588 { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
589 { "nowc", DBG(NO_WC), "Disable GTT write combining" },
590 { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
591 { "nodcc", DBG(NO_DCC), "Disable DCC." },
592 { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
593 { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
594 { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
595 { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
596 { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
597 { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
598 { "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
599 { "dccmsaa", DBG(DCC_MSAA), "Enable DCC for MSAA" },
600 { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
601 { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
602 { "dpbb", DBG(DPBB), "Enable DPBB." },
603 { "dfsm", DBG(DFSM), "Enable DFSM." },
604 { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
605 { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." },
606
607 DEBUG_NAMED_VALUE_END /* must be last */
608 };
609
610 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
611 const struct pipe_resource *templ)
612 {
613 if (templ->target == PIPE_BUFFER) {
614 return si_buffer_create(screen, templ, 256);
615 } else {
616 return si_texture_create(screen, templ);
617 }
618 }
619
620 bool si_common_screen_init(struct r600_common_screen *rscreen,
621 struct radeon_winsys *ws)
622 {
623 rscreen->b.resource_destroy = u_resource_destroy_vtbl;
624 rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
625
626 if (rscreen->info.has_hw_decode) {
627 rscreen->b.is_video_format_supported = si_vid_is_format_supported;
628 } else {
629 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
630 }
631
632 si_init_screen_texture_functions(rscreen);
633 si_init_screen_query_functions(rscreen);
634
635 rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
636 rscreen->has_rbplus = false;
637 rscreen->rbplus_allowed = false;
638
639 slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
640
641 rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
642 if (rscreen->force_aniso >= 0) {
643 printf("radeon: Forcing anisotropy filter to %ix\n",
644 /* round down to a power of two */
645 1 << util_logbase2(rscreen->force_aniso));
646 }
647
648 (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
649 (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
650
651 if (rscreen->debug_flags & DBG(INFO)) {
652 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
653 rscreen->info.pci_domain, rscreen->info.pci_bus,
654 rscreen->info.pci_dev, rscreen->info.pci_func);
655 printf("pci_id = 0x%x\n", rscreen->info.pci_id);
656 printf("family = %i\n", rscreen->info.family);
657 printf("chip_class = %i\n", rscreen->info.chip_class);
658 printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
659 printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
660 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
661 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
662 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
663 printf("max_alloc_size = %i MB\n",
664 (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
665 printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
666 printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
667 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
668 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
669 printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
670 printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
671 printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
672 printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
673 printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
674 printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
675 printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
676 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
677 printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
678 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
679 printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
680 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
681 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
682 printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
683 printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
684 rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
685 printf("has_userptr = %i\n", rscreen->info.has_userptr);
686 printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
687 printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
688
689 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
690 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
691 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
692 printf("max_se = %i\n", rscreen->info.max_se);
693 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
694
695 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
696 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
697 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
698 printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
699 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
700 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
701 printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
702 printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
703 }
704 return true;
705 }
706
707 void si_destroy_common_screen(struct r600_common_screen *rscreen)
708 {
709 si_perfcounters_destroy(rscreen);
710 si_gpu_load_kill_thread(rscreen);
711
712 mtx_destroy(&rscreen->gpu_load_mutex);
713 mtx_destroy(&rscreen->aux_context_lock);
714 rscreen->aux_context->destroy(rscreen->aux_context);
715
716 slab_destroy_parent(&rscreen->pool_transfers);
717
718 disk_cache_destroy(rscreen->disk_shader_cache);
719 rscreen->ws->destroy(rscreen->ws);
720 FREE(rscreen);
721 }
722
723 bool si_can_dump_shader(struct r600_common_screen *rscreen,
724 unsigned processor)
725 {
726 return rscreen->debug_flags & (1 << processor);
727 }
728
729 bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
730 {
731 return (rscreen->debug_flags & DBG(CHECK_IR)) ||
732 si_can_dump_shader(rscreen, processor);
733 }
734
735 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
736 uint64_t offset, uint64_t size, unsigned value)
737 {
738 struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
739
740 mtx_lock(&rscreen->aux_context_lock);
741 rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
742 rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
743 mtx_unlock(&rscreen->aux_context_lock);
744 }