radeonsi: use r600_common_context less pt4
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /**
25 * This file contains common screen and context structures and functions
26 * for r600g and radeonsi.
27 */
28
29 #ifndef R600_PIPE_COMMON_H
30 #define R600_PIPE_COMMON_H
31
32 #include <stdio.h>
33
34 #include "amd/common/ac_binary.h"
35
36 #include "radeon/radeon_winsys.h"
37
38 #include "util/disk_cache.h"
39 #include "util/u_blitter.h"
40 #include "util/list.h"
41 #include "util/u_range.h"
42 #include "util/slab.h"
43 #include "util/u_suballoc.h"
44 #include "util/u_transfer.h"
45 #include "util/u_threaded_context.h"
46
47 struct u_log_context;
48 struct si_screen;
49 struct si_context;
50
51 #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
52 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
53 #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
54 #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
55 #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
56 #define R600_RESOURCE_FLAG_READ_ONLY (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
57 #define R600_RESOURCE_FLAG_32BIT (PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
58
59 /* Debug flags. */
60 enum {
61 /* Shader logging options: */
62 DBG_VS = PIPE_SHADER_VERTEX,
63 DBG_PS = PIPE_SHADER_FRAGMENT,
64 DBG_GS = PIPE_SHADER_GEOMETRY,
65 DBG_TCS = PIPE_SHADER_TESS_CTRL,
66 DBG_TES = PIPE_SHADER_TESS_EVAL,
67 DBG_CS = PIPE_SHADER_COMPUTE,
68 DBG_NO_IR,
69 DBG_NO_TGSI,
70 DBG_NO_ASM,
71 DBG_PREOPT_IR,
72
73 /* Shader compiler options the shader cache should be aware of: */
74 DBG_FS_CORRECT_DERIVS_AFTER_KILL,
75 DBG_UNSAFE_MATH,
76 DBG_SI_SCHED,
77
78 /* Shader compiler options (with no effect on the shader cache): */
79 DBG_CHECK_IR,
80 DBG_NIR,
81 DBG_MONOLITHIC_SHADERS,
82 DBG_NO_OPT_VARIANT,
83
84 /* Information logging options: */
85 DBG_INFO,
86 DBG_TEX,
87 DBG_COMPUTE,
88 DBG_VM,
89
90 /* Driver options: */
91 DBG_FORCE_DMA,
92 DBG_NO_ASYNC_DMA,
93 DBG_NO_WC,
94 DBG_CHECK_VM,
95 DBG_RESERVE_VMID,
96
97 /* 3D engine options: */
98 DBG_SWITCH_ON_EOP,
99 DBG_NO_OUT_OF_ORDER,
100 DBG_NO_DPBB,
101 DBG_NO_DFSM,
102 DBG_DPBB,
103 DBG_DFSM,
104 DBG_NO_HYPERZ,
105 DBG_NO_RB_PLUS,
106 DBG_NO_2D_TILING,
107 DBG_NO_TILING,
108 DBG_NO_DCC,
109 DBG_NO_DCC_CLEAR,
110 DBG_NO_DCC_FB,
111 DBG_NO_DCC_MSAA,
112 DBG_DCC_MSAA,
113 DBG_NO_FMASK,
114
115 /* Tests: */
116 DBG_TEST_DMA,
117 DBG_TEST_VMFAULT_CP,
118 DBG_TEST_VMFAULT_SDMA,
119 DBG_TEST_VMFAULT_SHADER,
120 };
121
122 #define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
123 #define DBG(name) (1ull << DBG_##name)
124
125 #define R600_MAP_BUFFER_ALIGNMENT 64
126
127 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
128
129 struct r600_common_context;
130 struct r600_perfcounters;
131 struct tgsi_shader_info;
132 struct r600_qbo_state;
133
134 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
135 * at the moment.
136 */
137 struct r600_resource {
138 struct threaded_resource b;
139
140 /* Winsys objects. */
141 struct pb_buffer *buf;
142 uint64_t gpu_address;
143 /* Memory usage if the buffer placement is optimal. */
144 uint64_t vram_usage;
145 uint64_t gart_usage;
146
147 /* Resource properties. */
148 uint64_t bo_size;
149 unsigned bo_alignment;
150 enum radeon_bo_domain domains;
151 enum radeon_bo_flag flags;
152 unsigned bind_history;
153 int max_forced_staging_uploads;
154
155 /* The buffer range which is initialized (with a write transfer,
156 * streamout, DMA, or as a random access target). The rest of
157 * the buffer is considered invalid and can be mapped unsynchronized.
158 *
159 * This allows unsychronized mapping of a buffer range which hasn't
160 * been used yet. It's for applications which forget to use
161 * the unsynchronized map flag and expect the driver to figure it out.
162 */
163 struct util_range valid_buffer_range;
164
165 /* For buffers only. This indicates that a write operation has been
166 * performed by TC L2, but the cache hasn't been flushed.
167 * Any hw block which doesn't use or bypasses TC L2 should check this
168 * flag and flush the cache before using the buffer.
169 *
170 * For example, TC L2 must be flushed if a buffer which has been
171 * modified by a shader store instruction is about to be used as
172 * an index buffer. The reason is that VGT DMA index fetching doesn't
173 * use TC L2.
174 */
175 bool TC_L2_dirty;
176
177 /* Whether the resource has been exported via resource_get_handle. */
178 unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
179
180 /* Whether this resource is referenced by bindless handles. */
181 bool texture_handle_allocated;
182 bool image_handle_allocated;
183 };
184
185 struct r600_transfer {
186 struct threaded_transfer b;
187 struct r600_resource *staging;
188 unsigned offset;
189 };
190
191 struct r600_fmask_info {
192 uint64_t offset;
193 uint64_t size;
194 unsigned alignment;
195 unsigned pitch_in_pixels;
196 unsigned bank_height;
197 unsigned slice_tile_max;
198 unsigned tile_mode_index;
199 unsigned tile_swizzle;
200 };
201
202 struct r600_cmask_info {
203 uint64_t offset;
204 uint64_t size;
205 unsigned alignment;
206 unsigned slice_tile_max;
207 uint64_t base_address_reg;
208 };
209
210 struct r600_texture {
211 struct r600_resource resource;
212
213 struct radeon_surf surface;
214 uint64_t size;
215 struct r600_texture *flushed_depth_texture;
216
217 /* Colorbuffer compression and fast clear. */
218 struct r600_fmask_info fmask;
219 struct r600_cmask_info cmask;
220 struct r600_resource *cmask_buffer;
221 uint64_t dcc_offset; /* 0 = disabled */
222 unsigned cb_color_info; /* fast clear enable bit */
223 unsigned color_clear_value[2];
224 unsigned last_msaa_resolve_target_micro_mode;
225 unsigned num_level0_transfers;
226
227 /* Depth buffer compression and fast clear. */
228 uint64_t htile_offset;
229 float depth_clear_value;
230 uint16_t dirty_level_mask; /* each bit says if that mipmap is compressed */
231 uint16_t stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
232 enum pipe_format db_render_format:16;
233 uint8_t stencil_clear_value;
234 bool tc_compatible_htile:1;
235 bool depth_cleared:1; /* if it was cleared at least once */
236 bool stencil_cleared:1; /* if it was cleared at least once */
237 bool upgraded_depth:1; /* upgraded from unorm to Z32_FLOAT */
238 bool is_depth:1;
239 bool db_compatible:1;
240 bool can_sample_z:1;
241 bool can_sample_s:1;
242
243 /* We need to track DCC dirtiness, because st/dri usually calls
244 * flush_resource twice per frame (not a bug) and we don't wanna
245 * decompress DCC twice. Also, the dirty tracking must be done even
246 * if DCC isn't used, because it's required by the DCC usage analysis
247 * for a possible future enablement.
248 */
249 bool separate_dcc_dirty:1;
250 /* Statistics gathering for the DCC enablement heuristic. */
251 bool dcc_gather_statistics:1;
252 /* Counter that should be non-zero if the texture is bound to a
253 * framebuffer.
254 */
255 unsigned framebuffers_bound;
256 /* Whether the texture is a displayable back buffer and needs DCC
257 * decompression, which is expensive. Therefore, it's enabled only
258 * if statistics suggest that it will pay off and it's allocated
259 * separately. It can't be bound as a sampler by apps. Limited to
260 * target == 2D and last_level == 0. If enabled, dcc_offset contains
261 * the absolute GPUVM address, not the relative one.
262 */
263 struct r600_resource *dcc_separate_buffer;
264 /* When DCC is temporarily disabled, the separate buffer is here. */
265 struct r600_resource *last_dcc_separate_buffer;
266 /* Estimate of how much this color buffer is written to in units of
267 * full-screen draws: ps_invocations / (width * height)
268 * Shader kills, late Z, and blending with trivial discards make it
269 * inaccurate (we need to count CB updates, not PS invocations).
270 */
271 unsigned ps_draw_ratio;
272 /* The number of clears since the last DCC usage analysis. */
273 unsigned num_slow_clears;
274 };
275
276 struct r600_surface {
277 struct pipe_surface base;
278
279 /* These can vary with block-compressed textures. */
280 uint16_t width0;
281 uint16_t height0;
282
283 bool color_initialized:1;
284 bool depth_initialized:1;
285
286 /* Misc. color flags. */
287 bool color_is_int8:1;
288 bool color_is_int10:1;
289 bool dcc_incompatible:1;
290
291 /* Color registers. */
292 unsigned cb_color_info;
293 unsigned cb_color_view;
294 unsigned cb_color_attrib;
295 unsigned cb_color_attrib2; /* GFX9 and later */
296 unsigned cb_dcc_control; /* VI and later */
297 unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */
298 unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */
299 unsigned spi_shader_col_format_blend:8; /* blending without alpha. */
300 unsigned spi_shader_col_format_blend_alpha:8; /* blending with alpha. */
301
302 /* DB registers. */
303 uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE */
304 uint64_t db_stencil_base;
305 uint64_t db_htile_data_base;
306 unsigned db_depth_info;
307 unsigned db_z_info;
308 unsigned db_z_info2; /* GFX9+ */
309 unsigned db_depth_view;
310 unsigned db_depth_size;
311 unsigned db_depth_slice;
312 unsigned db_stencil_info;
313 unsigned db_stencil_info2; /* GFX9+ */
314 unsigned db_htile_surface;
315 };
316
317 struct r600_mmio_counter {
318 unsigned busy;
319 unsigned idle;
320 };
321
322 union r600_mmio_counters {
323 struct {
324 /* For global GPU load including SDMA. */
325 struct r600_mmio_counter gpu;
326
327 /* GRBM_STATUS */
328 struct r600_mmio_counter spi;
329 struct r600_mmio_counter gui;
330 struct r600_mmio_counter ta;
331 struct r600_mmio_counter gds;
332 struct r600_mmio_counter vgt;
333 struct r600_mmio_counter ia;
334 struct r600_mmio_counter sx;
335 struct r600_mmio_counter wd;
336 struct r600_mmio_counter bci;
337 struct r600_mmio_counter sc;
338 struct r600_mmio_counter pa;
339 struct r600_mmio_counter db;
340 struct r600_mmio_counter cp;
341 struct r600_mmio_counter cb;
342
343 /* SRBM_STATUS2 */
344 struct r600_mmio_counter sdma;
345
346 /* CP_STAT */
347 struct r600_mmio_counter pfp;
348 struct r600_mmio_counter meq;
349 struct r600_mmio_counter me;
350 struct r600_mmio_counter surf_sync;
351 struct r600_mmio_counter cp_dma;
352 struct r600_mmio_counter scratch_ram;
353 } named;
354 unsigned array[0];
355 };
356
357 struct r600_memory_object {
358 struct pipe_memory_object b;
359 struct pb_buffer *buf;
360 uint32_t stride;
361 uint32_t offset;
362 };
363
364 /* This encapsulates a state or an operation which can emitted into the GPU
365 * command stream. */
366 struct r600_atom {
367 void (*emit)(struct si_context *ctx, struct r600_atom *state);
368 unsigned short id;
369 };
370
371 /* Saved CS data for debugging features. */
372 struct radeon_saved_cs {
373 uint32_t *ib;
374 unsigned num_dw;
375
376 struct radeon_bo_list_item *bo_list;
377 unsigned bo_count;
378 };
379
380 struct r600_common_context {
381 struct pipe_context b; /* base class */
382
383 struct si_screen *screen;
384 struct radeon_winsys *ws;
385 struct radeon_winsys_ctx *ctx;
386 enum radeon_family family;
387 enum chip_class chip_class;
388 struct radeon_winsys_cs *gfx_cs;
389 struct radeon_winsys_cs *dma_cs;
390 struct pipe_fence_handle *last_gfx_fence;
391 struct pipe_fence_handle *last_sdma_fence;
392 struct r600_resource *eop_bug_scratch;
393 struct u_upload_mgr *cached_gtt_allocator;
394 unsigned num_gfx_cs_flushes;
395 unsigned initial_gfx_cs_size;
396 unsigned gpu_reset_counter;
397 unsigned last_dirty_tex_counter;
398 unsigned last_compressed_colortex_counter;
399 unsigned last_num_draw_calls;
400
401 struct threaded_context *tc;
402 struct u_suballocator *allocator_zeroed_memory;
403 struct slab_child_pool pool_transfers;
404 struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
405
406 /* Current unaccounted memory usage. */
407 uint64_t vram;
408 uint64_t gtt;
409
410 /* Additional context states. */
411 unsigned flags; /* flush flags */
412
413 /* Queries. */
414 /* Maintain the list of active queries for pausing between IBs. */
415 int num_occlusion_queries;
416 int num_perfect_occlusion_queries;
417 struct list_head active_queries;
418 unsigned num_cs_dw_queries_suspend;
419 /* Misc stats. */
420 unsigned num_draw_calls;
421 unsigned num_decompress_calls;
422 unsigned num_mrt_draw_calls;
423 unsigned num_prim_restart_calls;
424 unsigned num_spill_draw_calls;
425 unsigned num_compute_calls;
426 unsigned num_spill_compute_calls;
427 unsigned num_dma_calls;
428 unsigned num_cp_dma_calls;
429 unsigned num_vs_flushes;
430 unsigned num_ps_flushes;
431 unsigned num_cs_flushes;
432 unsigned num_cb_cache_flushes;
433 unsigned num_db_cache_flushes;
434 unsigned num_L2_invalidates;
435 unsigned num_L2_writebacks;
436 unsigned num_resident_handles;
437 uint64_t num_alloc_tex_transfer_bytes;
438 unsigned last_tex_ps_draw_ratio; /* for query */
439
440 /* Render condition. */
441 struct r600_atom render_cond_atom;
442 struct pipe_query *render_cond;
443 unsigned render_cond_mode;
444 bool render_cond_invert;
445 bool render_cond_force_off; /* for u_blitter */
446
447 /* Statistics gathering for the DCC enablement heuristic. It can't be
448 * in r600_texture because r600_texture can be shared by multiple
449 * contexts. This is for back buffers only. We shouldn't get too many
450 * of those.
451 *
452 * X11 DRI3 rotates among a finite set of back buffers. They should
453 * all fit in this array. If they don't, separate DCC might never be
454 * enabled by DCC stat gathering.
455 */
456 struct {
457 struct r600_texture *tex;
458 /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
459 struct pipe_query *ps_stats[3];
460 /* If all slots are used and another slot is needed,
461 * the least recently used slot is evicted based on this. */
462 int64_t last_use_timestamp;
463 bool query_active;
464 } dcc_stats[5];
465
466 struct pipe_device_reset_callback device_reset_callback;
467 struct u_log_context *log;
468
469 void *query_result_shader;
470
471 /* Copy one resource to another using async DMA. */
472 void (*dma_copy)(struct pipe_context *ctx,
473 struct pipe_resource *dst,
474 unsigned dst_level,
475 unsigned dst_x, unsigned dst_y, unsigned dst_z,
476 struct pipe_resource *src,
477 unsigned src_level,
478 const struct pipe_box *src_box);
479
480 void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
481 uint64_t offset, uint64_t size, unsigned value);
482 };
483
484 /* r600_buffer_common.c */
485 bool si_rings_is_buffer_referenced(struct si_context *sctx,
486 struct pb_buffer *buf,
487 enum radeon_bo_usage usage);
488 void *si_buffer_map_sync_with_rings(struct si_context *sctx,
489 struct r600_resource *resource,
490 unsigned usage);
491 void si_init_resource_fields(struct si_screen *sscreen,
492 struct r600_resource *res,
493 uint64_t size, unsigned alignment);
494 bool si_alloc_resource(struct si_screen *sscreen,
495 struct r600_resource *res);
496 struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
497 unsigned flags,
498 unsigned usage,
499 unsigned size,
500 unsigned alignment);
501 void si_replace_buffer_storage(struct pipe_context *ctx,
502 struct pipe_resource *dst,
503 struct pipe_resource *src);
504 void si_init_screen_buffer_functions(struct si_screen *sscreen);
505 void si_init_buffer_functions(struct si_context *sctx);
506
507 /* r600_common_pipe.c */
508 bool si_common_context_init(struct r600_common_context *rctx,
509 struct si_screen *sscreen,
510 unsigned context_flags);
511 void si_common_context_cleanup(struct r600_common_context *rctx);
512 bool si_check_device_reset(struct r600_common_context *rctx);
513
514 /* r600_gpu_load.c */
515 void si_gpu_load_kill_thread(struct si_screen *sscreen);
516 uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);
517 unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
518 uint64_t begin);
519
520 /* r600_perfcounters.c */
521 void si_perfcounters_destroy(struct si_screen *sscreen);
522
523 /* r600_query.c */
524 void si_init_screen_query_functions(struct si_screen *sscreen);
525 void si_init_query_functions(struct r600_common_context *rctx);
526 void si_suspend_queries(struct r600_common_context *ctx);
527 void si_resume_queries(struct r600_common_context *ctx);
528
529 /* r600_texture.c */
530 bool si_prepare_for_dma_blit(struct si_context *sctx,
531 struct r600_texture *rdst,
532 unsigned dst_level, unsigned dstx,
533 unsigned dsty, unsigned dstz,
534 struct r600_texture *rsrc,
535 unsigned src_level,
536 const struct pipe_box *src_box);
537 void si_texture_get_fmask_info(struct si_screen *sscreen,
538 struct r600_texture *rtex,
539 unsigned nr_samples,
540 struct r600_fmask_info *out);
541 void si_texture_get_cmask_info(struct si_screen *sscreen,
542 struct r600_texture *rtex,
543 struct r600_cmask_info *out);
544 void si_eliminate_fast_color_clear(struct si_context *sctx,
545 struct r600_texture *rtex);
546 void si_texture_discard_cmask(struct si_screen *sscreen,
547 struct r600_texture *rtex);
548 bool si_init_flushed_depth_texture(struct pipe_context *ctx,
549 struct pipe_resource *texture,
550 struct r600_texture **staging);
551 void si_print_texture_info(struct si_screen *sscreen,
552 struct r600_texture *rtex, struct u_log_context *log);
553 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
554 const struct pipe_resource *templ);
555 bool vi_dcc_formats_compatible(enum pipe_format format1,
556 enum pipe_format format2);
557 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
558 unsigned level,
559 enum pipe_format view_format);
560 void vi_disable_dcc_if_incompatible_format(struct si_context *sctx,
561 struct pipe_resource *tex,
562 unsigned level,
563 enum pipe_format view_format);
564 struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
565 struct pipe_resource *texture,
566 const struct pipe_surface *templ,
567 unsigned width0, unsigned height0,
568 unsigned width, unsigned height);
569 unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
570 void vi_separate_dcc_try_enable(struct si_context *sctx,
571 struct r600_texture *tex);
572 void vi_separate_dcc_start_query(struct pipe_context *ctx,
573 struct r600_texture *tex);
574 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
575 struct r600_texture *tex);
576 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
577 struct r600_texture *tex);
578 bool si_texture_disable_dcc(struct si_context *sctx,
579 struct r600_texture *rtex);
580 void si_init_screen_texture_functions(struct si_screen *sscreen);
581 void si_init_context_texture_functions(struct si_context *sctx);
582
583
584 /* Inline helpers. */
585
586 static inline struct r600_resource *r600_resource(struct pipe_resource *r)
587 {
588 return (struct r600_resource*)r;
589 }
590
591 static inline void
592 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
593 {
594 pipe_resource_reference((struct pipe_resource **)ptr,
595 (struct pipe_resource *)res);
596 }
597
598 static inline void
599 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
600 {
601 pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
602 }
603
604 static inline bool
605 vi_dcc_enabled(struct r600_texture *tex, unsigned level)
606 {
607 return tex->dcc_offset && level < tex->surface.num_dcc_levels;
608 }
609
610 #define R600_ERR(fmt, args...) \
611 fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
612
613 #endif