radeonsi: clamp depth comparison value only for fixed point formats
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Marek Olšák <maraeo@gmail.com>
24 *
25 */
26
27 /**
28 * This file contains common screen and context structures and functions
29 * for r600g and radeonsi.
30 */
31
32 #ifndef R600_PIPE_COMMON_H
33 #define R600_PIPE_COMMON_H
34
35 #include <stdio.h>
36
37 #include "amd/common/ac_binary.h"
38
39 #include "radeon/radeon_winsys.h"
40
41 #include "util/disk_cache.h"
42 #include "util/u_blitter.h"
43 #include "util/list.h"
44 #include "util/u_range.h"
45 #include "util/slab.h"
46 #include "util/u_suballoc.h"
47 #include "util/u_transfer.h"
48 #include "util/u_threaded_context.h"
49
50 struct u_log_context;
51
52 #define ATI_VENDOR_ID 0x1002
53
54 #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
55 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
56 #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
57 #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
58 #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
59
60 #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
61 /* Pipeline & streamout query controls. */
62 #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
63 #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
64 #define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
65 #define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
66
67 /* special primitive types */
68 #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
69
70 #define R600_NOT_QUERY 0xffffffff
71
72 /* Debug flags. */
73 #define DBG_VS (1 << PIPE_SHADER_VERTEX)
74 #define DBG_PS (1 << PIPE_SHADER_FRAGMENT)
75 #define DBG_GS (1 << PIPE_SHADER_GEOMETRY)
76 #define DBG_TCS (1 << PIPE_SHADER_TESS_CTRL)
77 #define DBG_TES (1 << PIPE_SHADER_TESS_EVAL)
78 #define DBG_CS (1 << PIPE_SHADER_COMPUTE)
79 #define DBG_ALL_SHADERS (DBG_FS - 1)
80 #define DBG_FS (1 << 6) /* fetch shader */
81 #define DBG_TEX (1 << 7)
82 #define DBG_NIR (1 << 8)
83 #define DBG_COMPUTE (1 << 9)
84 /* gap */
85 #define DBG_VM (1 << 11)
86 #define DBG_NO_IR (1 << 12)
87 #define DBG_NO_TGSI (1 << 13)
88 #define DBG_NO_ASM (1 << 14)
89 #define DBG_PREOPT_IR (1 << 15)
90 #define DBG_CHECK_IR (1 << 16)
91 #define DBG_NO_OPT_VARIANT (1 << 17)
92 #define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
93 /* gaps */
94 #define DBG_TEST_DMA (1 << 20)
95 /* Bits 21-31 are reserved for the r600g driver. */
96 /* features */
97 #define DBG_NO_ASYNC_DMA (1ull << 32)
98 #define DBG_NO_HYPERZ (1ull << 33)
99 #define DBG_NO_DISCARD_RANGE (1ull << 34)
100 #define DBG_NO_2D_TILING (1ull << 35)
101 #define DBG_NO_TILING (1ull << 36)
102 #define DBG_SWITCH_ON_EOP (1ull << 37)
103 #define DBG_FORCE_DMA (1ull << 38)
104 #define DBG_PRECOMPILE (1ull << 39)
105 #define DBG_INFO (1ull << 40)
106 #define DBG_NO_WC (1ull << 41)
107 #define DBG_CHECK_VM (1ull << 42)
108 #define DBG_NO_DCC (1ull << 43)
109 #define DBG_NO_DCC_CLEAR (1ull << 44)
110 #define DBG_NO_RB_PLUS (1ull << 45)
111 #define DBG_SI_SCHED (1ull << 46)
112 #define DBG_MONOLITHIC_SHADERS (1ull << 47)
113 #define DBG_NO_OUT_OF_ORDER (1ull << 48)
114 #define DBG_UNSAFE_MATH (1ull << 49)
115 #define DBG_NO_DCC_FB (1ull << 50)
116 #define DBG_TEST_VMFAULT_CP (1ull << 51)
117 #define DBG_TEST_VMFAULT_SDMA (1ull << 52)
118 #define DBG_TEST_VMFAULT_SHADER (1ull << 53)
119 #define DBG_NO_DPBB (1ull << 54)
120 #define DBG_NO_DFSM (1ull << 55)
121
122 #define R600_MAP_BUFFER_ALIGNMENT 64
123 #define R600_MAX_VIEWPORTS 16
124
125 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
126
127 enum r600_coherency {
128 R600_COHERENCY_NONE, /* no cache flushes needed */
129 R600_COHERENCY_SHADER,
130 R600_COHERENCY_CB_META,
131 };
132
133 #ifdef PIPE_ARCH_BIG_ENDIAN
134 #define R600_BIG_ENDIAN 1
135 #else
136 #define R600_BIG_ENDIAN 0
137 #endif
138
139 struct r600_common_context;
140 struct r600_perfcounters;
141 struct tgsi_shader_info;
142 struct r600_qbo_state;
143
144 void si_radeon_shader_binary_init(struct ac_shader_binary *b);
145 void si_radeon_shader_binary_clean(struct ac_shader_binary *b);
146
147 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
148 * at the moment.
149 */
150 struct r600_resource {
151 struct threaded_resource b;
152
153 /* Winsys objects. */
154 struct pb_buffer *buf;
155 uint64_t gpu_address;
156 /* Memory usage if the buffer placement is optimal. */
157 uint64_t vram_usage;
158 uint64_t gart_usage;
159
160 /* Resource properties. */
161 uint64_t bo_size;
162 unsigned bo_alignment;
163 enum radeon_bo_domain domains;
164 enum radeon_bo_flag flags;
165 unsigned bind_history;
166
167 /* The buffer range which is initialized (with a write transfer,
168 * streamout, DMA, or as a random access target). The rest of
169 * the buffer is considered invalid and can be mapped unsynchronized.
170 *
171 * This allows unsychronized mapping of a buffer range which hasn't
172 * been used yet. It's for applications which forget to use
173 * the unsynchronized map flag and expect the driver to figure it out.
174 */
175 struct util_range valid_buffer_range;
176
177 /* For buffers only. This indicates that a write operation has been
178 * performed by TC L2, but the cache hasn't been flushed.
179 * Any hw block which doesn't use or bypasses TC L2 should check this
180 * flag and flush the cache before using the buffer.
181 *
182 * For example, TC L2 must be flushed if a buffer which has been
183 * modified by a shader store instruction is about to be used as
184 * an index buffer. The reason is that VGT DMA index fetching doesn't
185 * use TC L2.
186 */
187 bool TC_L2_dirty;
188
189 /* Whether the resource has been exported via resource_get_handle. */
190 unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
191
192 /* Whether this resource is referenced by bindless handles. */
193 bool texture_handle_allocated;
194 bool image_handle_allocated;
195 };
196
197 struct r600_transfer {
198 struct threaded_transfer b;
199 struct r600_resource *staging;
200 unsigned offset;
201 };
202
203 struct r600_fmask_info {
204 uint64_t offset;
205 uint64_t size;
206 unsigned alignment;
207 unsigned pitch_in_pixels;
208 unsigned bank_height;
209 unsigned slice_tile_max;
210 unsigned tile_mode_index;
211 unsigned tile_swizzle;
212 };
213
214 struct r600_cmask_info {
215 uint64_t offset;
216 uint64_t size;
217 unsigned alignment;
218 unsigned slice_tile_max;
219 uint64_t base_address_reg;
220 };
221
222 struct r600_texture {
223 struct r600_resource resource;
224
225 uint64_t size;
226 unsigned num_level0_transfers;
227 enum pipe_format db_render_format;
228 bool is_depth;
229 bool db_compatible;
230 bool can_sample_z;
231 bool can_sample_s;
232 unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
233 unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
234 struct r600_texture *flushed_depth_texture;
235 struct radeon_surf surface;
236
237 /* Colorbuffer compression and fast clear. */
238 struct r600_fmask_info fmask;
239 struct r600_cmask_info cmask;
240 struct r600_resource *cmask_buffer;
241 uint64_t dcc_offset; /* 0 = disabled */
242 unsigned cb_color_info; /* fast clear enable bit */
243 unsigned color_clear_value[2];
244 unsigned last_msaa_resolve_target_micro_mode;
245
246 /* Depth buffer compression and fast clear. */
247 uint64_t htile_offset;
248 bool tc_compatible_htile;
249 bool depth_cleared; /* if it was cleared at least once */
250 float depth_clear_value;
251 bool stencil_cleared; /* if it was cleared at least once */
252 uint8_t stencil_clear_value;
253 bool upgraded_depth; /* upgraded from unorm to Z32_FLOAT */
254
255 bool non_disp_tiling; /* R600-Cayman only */
256
257 /* Whether the texture is a displayable back buffer and needs DCC
258 * decompression, which is expensive. Therefore, it's enabled only
259 * if statistics suggest that it will pay off and it's allocated
260 * separately. It can't be bound as a sampler by apps. Limited to
261 * target == 2D and last_level == 0. If enabled, dcc_offset contains
262 * the absolute GPUVM address, not the relative one.
263 */
264 struct r600_resource *dcc_separate_buffer;
265 /* When DCC is temporarily disabled, the separate buffer is here. */
266 struct r600_resource *last_dcc_separate_buffer;
267 /* We need to track DCC dirtiness, because st/dri usually calls
268 * flush_resource twice per frame (not a bug) and we don't wanna
269 * decompress DCC twice. Also, the dirty tracking must be done even
270 * if DCC isn't used, because it's required by the DCC usage analysis
271 * for a possible future enablement.
272 */
273 bool separate_dcc_dirty;
274 /* Statistics gathering for the DCC enablement heuristic. */
275 bool dcc_gather_statistics;
276 /* Estimate of how much this color buffer is written to in units of
277 * full-screen draws: ps_invocations / (width * height)
278 * Shader kills, late Z, and blending with trivial discards make it
279 * inaccurate (we need to count CB updates, not PS invocations).
280 */
281 unsigned ps_draw_ratio;
282 /* The number of clears since the last DCC usage analysis. */
283 unsigned num_slow_clears;
284
285 /* Counter that should be non-zero if the texture is bound to a
286 * framebuffer. Implemented in radeonsi only.
287 */
288 uint32_t framebuffers_bound;
289 };
290
291 struct r600_surface {
292 struct pipe_surface base;
293
294 /* These can vary with block-compressed textures. */
295 unsigned width0;
296 unsigned height0;
297
298 bool color_initialized;
299 bool depth_initialized;
300
301 /* Misc. color flags. */
302 bool alphatest_bypass;
303 bool export_16bpc;
304 bool color_is_int8;
305 bool color_is_int10;
306 bool dcc_incompatible;
307
308 /* Color registers. */
309 unsigned cb_color_info;
310 unsigned cb_color_base;
311 unsigned cb_color_view;
312 unsigned cb_color_size; /* R600 only */
313 unsigned cb_color_dim; /* EG only */
314 unsigned cb_color_pitch; /* EG and later */
315 unsigned cb_color_slice; /* EG and later */
316 unsigned cb_color_attrib; /* EG and later */
317 unsigned cb_color_attrib2; /* GFX9 and later */
318 unsigned cb_dcc_control; /* VI and later */
319 unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
320 unsigned cb_color_fmask_slice; /* EG and later */
321 unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
322 unsigned cb_color_mask; /* R600 only */
323 unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */
324 unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
325 unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
326 unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
327 struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
328 struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
329
330 /* DB registers. */
331 uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
332 uint64_t db_stencil_base; /* EG and later */
333 uint64_t db_htile_data_base;
334 unsigned db_depth_info; /* R600 only, then SI and later */
335 unsigned db_z_info; /* EG and later */
336 unsigned db_z_info2; /* GFX9+ */
337 unsigned db_depth_view;
338 unsigned db_depth_size;
339 unsigned db_depth_slice; /* EG and later */
340 unsigned db_stencil_info; /* EG and later */
341 unsigned db_stencil_info2; /* GFX9+ */
342 unsigned db_prefetch_limit; /* R600 only */
343 unsigned db_htile_surface;
344 unsigned db_preload_control; /* EG and later */
345 };
346
347 struct r600_mmio_counter {
348 unsigned busy;
349 unsigned idle;
350 };
351
352 union r600_mmio_counters {
353 struct {
354 /* For global GPU load including SDMA. */
355 struct r600_mmio_counter gpu;
356
357 /* GRBM_STATUS */
358 struct r600_mmio_counter spi;
359 struct r600_mmio_counter gui;
360 struct r600_mmio_counter ta;
361 struct r600_mmio_counter gds;
362 struct r600_mmio_counter vgt;
363 struct r600_mmio_counter ia;
364 struct r600_mmio_counter sx;
365 struct r600_mmio_counter wd;
366 struct r600_mmio_counter bci;
367 struct r600_mmio_counter sc;
368 struct r600_mmio_counter pa;
369 struct r600_mmio_counter db;
370 struct r600_mmio_counter cp;
371 struct r600_mmio_counter cb;
372
373 /* SRBM_STATUS2 */
374 struct r600_mmio_counter sdma;
375
376 /* CP_STAT */
377 struct r600_mmio_counter pfp;
378 struct r600_mmio_counter meq;
379 struct r600_mmio_counter me;
380 struct r600_mmio_counter surf_sync;
381 struct r600_mmio_counter cp_dma;
382 struct r600_mmio_counter scratch_ram;
383 } named;
384 unsigned array[0];
385 };
386
387 struct r600_memory_object {
388 struct pipe_memory_object b;
389 struct pb_buffer *buf;
390 uint32_t stride;
391 uint32_t offset;
392 };
393
394 struct r600_common_screen {
395 struct pipe_screen b;
396 struct radeon_winsys *ws;
397 enum radeon_family family;
398 enum chip_class chip_class;
399 struct radeon_info info;
400 uint64_t debug_flags;
401 bool has_cp_dma;
402 bool has_streamout;
403 bool has_rbplus; /* if RB+ registers exist */
404 bool rbplus_allowed; /* if RB+ is allowed */
405
406 struct disk_cache *disk_shader_cache;
407
408 struct slab_parent_pool pool_transfers;
409
410 /* Texture filter settings. */
411 int force_aniso; /* -1 = disabled */
412
413 /* Auxiliary context. Mainly used to initialize resources.
414 * It must be locked prior to using and flushed before unlocking. */
415 struct pipe_context *aux_context;
416 mtx_t aux_context_lock;
417
418 /* This must be in the screen, because UE4 uses one context for
419 * compilation and another one for rendering.
420 */
421 unsigned num_compilations;
422 /* Along with ST_DEBUG=precompile, this should show if applications
423 * are loading shaders on demand. This is a monotonic counter.
424 */
425 unsigned num_shaders_created;
426 unsigned num_shader_cache_hits;
427
428 /* GPU load thread. */
429 mtx_t gpu_load_mutex;
430 thrd_t gpu_load_thread;
431 union r600_mmio_counters mmio_counters;
432 volatile unsigned gpu_load_stop_thread; /* bool */
433
434 char renderer_string[100];
435
436 /* Performance counters. */
437 struct r600_perfcounters *perfcounters;
438
439 /* If pipe_screen wants to recompute and re-emit the framebuffer,
440 * sampler, and image states of all contexts, it should atomically
441 * increment this.
442 *
443 * Each context will compare this with its own last known value of
444 * the counter before drawing and re-emit the states accordingly.
445 */
446 unsigned dirty_tex_counter;
447
448 /* Atomically increment this counter when an existing texture's
449 * metadata is enabled or disabled in a way that requires changing
450 * contexts' compressed texture binding masks.
451 */
452 unsigned compressed_colortex_counter;
453
454 struct {
455 /* Context flags to set so that all writes from earlier jobs
456 * in the CP are seen by L2 clients.
457 */
458 unsigned cp_to_L2;
459
460 /* Context flags to set so that all writes from earlier jobs
461 * that end in L2 are seen by CP.
462 */
463 unsigned L2_to_cp;
464
465 /* Context flags to set so that all writes from earlier
466 * compute jobs are seen by L2 clients.
467 */
468 unsigned compute_to_L2;
469 } barrier_flags;
470
471 void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
472 struct r600_texture *rtex,
473 struct radeon_bo_metadata *md);
474
475 void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
476 struct r600_texture *rtex,
477 struct radeon_bo_metadata *md);
478 };
479
480 /* This encapsulates a state or an operation which can emitted into the GPU
481 * command stream. */
482 struct r600_atom {
483 void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
484 unsigned num_dw;
485 unsigned short id;
486 };
487
488 struct r600_so_target {
489 struct pipe_stream_output_target b;
490
491 /* The buffer where BUFFER_FILLED_SIZE is stored. */
492 struct r600_resource *buf_filled_size;
493 unsigned buf_filled_size_offset;
494 bool buf_filled_size_valid;
495
496 unsigned stride_in_dw;
497 };
498
499 struct r600_streamout {
500 struct r600_atom begin_atom;
501 bool begin_emitted;
502 unsigned num_dw_for_end;
503
504 unsigned enabled_mask;
505 unsigned num_targets;
506 struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS];
507
508 unsigned append_bitmask;
509 bool suspended;
510
511 /* External state which comes from the vertex shader,
512 * it must be set explicitly when binding a shader. */
513 uint16_t *stride_in_dw;
514 unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
515
516 /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
517 unsigned hw_enabled_mask;
518
519 /* The state of VGT_STRMOUT_(CONFIG|EN). */
520 struct r600_atom enable_atom;
521 bool streamout_enabled;
522 bool prims_gen_query_enabled;
523 int num_prims_gen_queries;
524 };
525
526 struct r600_signed_scissor {
527 int minx;
528 int miny;
529 int maxx;
530 int maxy;
531 };
532
533 struct r600_scissors {
534 struct r600_atom atom;
535 unsigned dirty_mask;
536 struct pipe_scissor_state states[R600_MAX_VIEWPORTS];
537 };
538
539 struct r600_viewports {
540 struct r600_atom atom;
541 unsigned dirty_mask;
542 unsigned depth_range_dirty_mask;
543 struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
544 struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
545 };
546
547 struct r600_ring {
548 struct radeon_winsys_cs *cs;
549 void (*flush)(void *ctx, unsigned flags,
550 struct pipe_fence_handle **fence);
551 };
552
553 /* Saved CS data for debugging features. */
554 struct radeon_saved_cs {
555 uint32_t *ib;
556 unsigned num_dw;
557
558 struct radeon_bo_list_item *bo_list;
559 unsigned bo_count;
560 };
561
562 struct r600_common_context {
563 struct pipe_context b; /* base class */
564
565 struct r600_common_screen *screen;
566 struct radeon_winsys *ws;
567 struct radeon_winsys_ctx *ctx;
568 enum radeon_family family;
569 enum chip_class chip_class;
570 struct r600_ring gfx;
571 struct r600_ring dma;
572 struct pipe_fence_handle *last_gfx_fence;
573 struct pipe_fence_handle *last_sdma_fence;
574 struct r600_resource *eop_bug_scratch;
575 unsigned num_gfx_cs_flushes;
576 unsigned initial_gfx_cs_size;
577 unsigned gpu_reset_counter;
578 unsigned last_dirty_tex_counter;
579 unsigned last_compressed_colortex_counter;
580 unsigned last_num_draw_calls;
581
582 struct threaded_context *tc;
583 struct u_suballocator *allocator_zeroed_memory;
584 struct slab_child_pool pool_transfers;
585 struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
586
587 /* Current unaccounted memory usage. */
588 uint64_t vram;
589 uint64_t gtt;
590
591 /* States. */
592 struct r600_streamout streamout;
593 struct r600_scissors scissors;
594 struct r600_viewports viewports;
595 bool scissor_enabled;
596 bool clip_halfz;
597 bool vs_writes_viewport_index;
598 bool vs_disables_clipping_viewport;
599
600 /* Additional context states. */
601 unsigned flags; /* flush flags */
602
603 /* Queries. */
604 /* Maintain the list of active queries for pausing between IBs. */
605 int num_occlusion_queries;
606 int num_perfect_occlusion_queries;
607 struct list_head active_queries;
608 unsigned num_cs_dw_queries_suspend;
609 /* Misc stats. */
610 unsigned num_draw_calls;
611 unsigned num_decompress_calls;
612 unsigned num_mrt_draw_calls;
613 unsigned num_prim_restart_calls;
614 unsigned num_spill_draw_calls;
615 unsigned num_compute_calls;
616 unsigned num_spill_compute_calls;
617 unsigned num_dma_calls;
618 unsigned num_cp_dma_calls;
619 unsigned num_vs_flushes;
620 unsigned num_ps_flushes;
621 unsigned num_cs_flushes;
622 unsigned num_cb_cache_flushes;
623 unsigned num_db_cache_flushes;
624 unsigned num_L2_invalidates;
625 unsigned num_L2_writebacks;
626 unsigned num_resident_handles;
627 uint64_t num_alloc_tex_transfer_bytes;
628 unsigned last_tex_ps_draw_ratio; /* for query */
629
630 /* Render condition. */
631 struct r600_atom render_cond_atom;
632 struct pipe_query *render_cond;
633 unsigned render_cond_mode;
634 bool render_cond_invert;
635 bool render_cond_force_off; /* for u_blitter */
636
637 /* MSAA sample locations.
638 * The first index is the sample index.
639 * The second index is the coordinate: X, Y. */
640 float sample_locations_1x[1][2];
641 float sample_locations_2x[2][2];
642 float sample_locations_4x[4][2];
643 float sample_locations_8x[8][2];
644 float sample_locations_16x[16][2];
645
646 /* Statistics gathering for the DCC enablement heuristic. It can't be
647 * in r600_texture because r600_texture can be shared by multiple
648 * contexts. This is for back buffers only. We shouldn't get too many
649 * of those.
650 *
651 * X11 DRI3 rotates among a finite set of back buffers. They should
652 * all fit in this array. If they don't, separate DCC might never be
653 * enabled by DCC stat gathering.
654 */
655 struct {
656 struct r600_texture *tex;
657 /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
658 struct pipe_query *ps_stats[3];
659 /* If all slots are used and another slot is needed,
660 * the least recently used slot is evicted based on this. */
661 int64_t last_use_timestamp;
662 bool query_active;
663 } dcc_stats[5];
664
665 struct pipe_debug_callback debug;
666 struct pipe_device_reset_callback device_reset_callback;
667 struct u_log_context *log;
668
669 void *query_result_shader;
670
671 /* Copy one resource to another using async DMA. */
672 void (*dma_copy)(struct pipe_context *ctx,
673 struct pipe_resource *dst,
674 unsigned dst_level,
675 unsigned dst_x, unsigned dst_y, unsigned dst_z,
676 struct pipe_resource *src,
677 unsigned src_level,
678 const struct pipe_box *src_box);
679
680 void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
681 uint64_t offset, uint64_t size, unsigned value);
682
683 void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
684 uint64_t offset, uint64_t size, unsigned value,
685 enum r600_coherency coher);
686
687 void (*blit_decompress_depth)(struct pipe_context *ctx,
688 struct r600_texture *texture,
689 struct r600_texture *staging,
690 unsigned first_level, unsigned last_level,
691 unsigned first_layer, unsigned last_layer,
692 unsigned first_sample, unsigned last_sample);
693
694 void (*decompress_dcc)(struct pipe_context *ctx,
695 struct r600_texture *rtex);
696
697 /* Reallocate the buffer and update all resource bindings where
698 * the buffer is bound, including all resource descriptors. */
699 void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
700
701 /* Update all resource bindings where the buffer is bound, including
702 * all resource descriptors. This is invalidate_buffer without
703 * the invalidation. */
704 void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
705 uint64_t old_gpu_address);
706
707 /* Enable or disable occlusion queries. */
708 void (*set_occlusion_query_state)(struct pipe_context *ctx,
709 bool old_enable,
710 bool old_perfect_enable);
711
712 void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
713
714 /* This ensures there is enough space in the command stream. */
715 void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
716 bool include_draw_vbo);
717
718 void (*set_atom_dirty)(struct r600_common_context *ctx,
719 struct r600_atom *atom, bool dirty);
720
721 void (*check_vm_faults)(struct r600_common_context *ctx,
722 struct radeon_saved_cs *saved,
723 enum ring_type ring);
724 };
725
726 /* r600_buffer_common.c */
727 bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
728 struct pb_buffer *buf,
729 enum radeon_bo_usage usage);
730 void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
731 struct r600_resource *resource,
732 unsigned usage);
733 void si_buffer_subdata(struct pipe_context *ctx,
734 struct pipe_resource *buffer,
735 unsigned usage, unsigned offset,
736 unsigned size, const void *data);
737 void si_init_resource_fields(struct r600_common_screen *rscreen,
738 struct r600_resource *res,
739 uint64_t size, unsigned alignment);
740 bool si_alloc_resource(struct r600_common_screen *rscreen,
741 struct r600_resource *res);
742 struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
743 const struct pipe_resource *templ,
744 unsigned alignment);
745 struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
746 unsigned flags,
747 unsigned usage,
748 unsigned size,
749 unsigned alignment);
750 struct pipe_resource *
751 si_buffer_from_user_memory(struct pipe_screen *screen,
752 const struct pipe_resource *templ,
753 void *user_memory);
754 void si_invalidate_resource(struct pipe_context *ctx,
755 struct pipe_resource *resource);
756 void si_replace_buffer_storage(struct pipe_context *ctx,
757 struct pipe_resource *dst,
758 struct pipe_resource *src);
759
760 /* r600_common_pipe.c */
761 void si_gfx_write_event_eop(struct r600_common_context *ctx,
762 unsigned event, unsigned event_flags,
763 unsigned data_sel,
764 struct r600_resource *buf, uint64_t va,
765 uint32_t new_fence, unsigned query_type);
766 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen);
767 void si_gfx_wait_fence(struct r600_common_context *ctx,
768 uint64_t va, uint32_t ref, uint32_t mask);
769 void si_draw_rectangle(struct blitter_context *blitter,
770 int x1, int y1, int x2, int y2,
771 float depth, unsigned num_instances,
772 enum blitter_attrib_type type,
773 const union blitter_attrib *attrib);
774 bool si_common_screen_init(struct r600_common_screen *rscreen,
775 struct radeon_winsys *ws);
776 void si_destroy_common_screen(struct r600_common_screen *rscreen);
777 void si_preflush_suspend_features(struct r600_common_context *ctx);
778 void si_postflush_resume_features(struct r600_common_context *ctx);
779 bool si_common_context_init(struct r600_common_context *rctx,
780 struct r600_common_screen *rscreen,
781 unsigned context_flags);
782 void si_common_context_cleanup(struct r600_common_context *rctx);
783 bool si_can_dump_shader(struct r600_common_screen *rscreen,
784 unsigned processor);
785 bool si_extra_shader_checks(struct r600_common_screen *rscreen,
786 unsigned processor);
787 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
788 uint64_t offset, uint64_t size, unsigned value);
789 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
790 const struct pipe_resource *templ);
791 const char *si_get_llvm_processor_name(enum radeon_family family);
792 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
793 struct r600_resource *dst, struct r600_resource *src);
794 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
795 struct radeon_saved_cs *saved, bool get_buffer_list);
796 void si_clear_saved_cs(struct radeon_saved_cs *saved);
797 bool si_check_device_reset(struct r600_common_context *rctx);
798
799 /* r600_gpu_load.c */
800 void si_gpu_load_kill_thread(struct r600_common_screen *rscreen);
801 uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type);
802 unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
803 uint64_t begin);
804
805 /* r600_perfcounters.c */
806 void si_perfcounters_destroy(struct r600_common_screen *rscreen);
807
808 /* r600_query.c */
809 void si_init_screen_query_functions(struct r600_common_screen *rscreen);
810 void si_init_query_functions(struct r600_common_context *rctx);
811 void si_suspend_queries(struct r600_common_context *ctx);
812 void si_resume_queries(struct r600_common_context *ctx);
813
814 /* r600_streamout.c */
815 void si_streamout_buffers_dirty(struct r600_common_context *rctx);
816 void si_common_set_streamout_targets(struct pipe_context *ctx,
817 unsigned num_targets,
818 struct pipe_stream_output_target **targets,
819 const unsigned *offset);
820 void si_emit_streamout_end(struct r600_common_context *rctx);
821 void si_update_prims_generated_query_state(struct r600_common_context *rctx,
822 unsigned type, int diff);
823 void si_streamout_init(struct r600_common_context *rctx);
824
825 /* r600_test_dma.c */
826 void si_test_dma(struct r600_common_screen *rscreen);
827
828 /* r600_texture.c */
829 bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
830 struct r600_texture *rdst,
831 unsigned dst_level, unsigned dstx,
832 unsigned dsty, unsigned dstz,
833 struct r600_texture *rsrc,
834 unsigned src_level,
835 const struct pipe_box *src_box);
836 void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
837 struct r600_texture *rtex,
838 unsigned nr_samples,
839 struct r600_fmask_info *out);
840 bool si_init_flushed_depth_texture(struct pipe_context *ctx,
841 struct pipe_resource *texture,
842 struct r600_texture **staging);
843 void si_print_texture_info(struct r600_common_screen *rscreen,
844 struct r600_texture *rtex, struct u_log_context *log);
845 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
846 const struct pipe_resource *templ);
847 bool vi_dcc_formats_compatible(enum pipe_format format1,
848 enum pipe_format format2);
849 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
850 unsigned level,
851 enum pipe_format view_format);
852 void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
853 struct pipe_resource *tex,
854 unsigned level,
855 enum pipe_format view_format);
856 struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
857 struct pipe_resource *texture,
858 const struct pipe_surface *templ,
859 unsigned width0, unsigned height0,
860 unsigned width, unsigned height);
861 unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
862 void vi_separate_dcc_start_query(struct pipe_context *ctx,
863 struct r600_texture *tex);
864 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
865 struct r600_texture *tex);
866 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
867 struct r600_texture *tex);
868 void vi_dcc_clear_level(struct r600_common_context *rctx,
869 struct r600_texture *rtex,
870 unsigned level, unsigned clear_value);
871 void si_do_fast_color_clear(struct r600_common_context *rctx,
872 struct pipe_framebuffer_state *fb,
873 struct r600_atom *fb_state,
874 unsigned *buffers, ubyte *dirty_cbufs,
875 const union pipe_color_union *color);
876 bool si_texture_disable_dcc(struct r600_common_context *rctx,
877 struct r600_texture *rtex);
878 void si_init_screen_texture_functions(struct r600_common_screen *rscreen);
879 void si_init_context_texture_functions(struct r600_common_context *rctx);
880
881 /* r600_viewport.c */
882 void si_apply_scissor_bug_workaround(struct r600_common_context *rctx,
883 struct pipe_scissor_state *scissor);
884 void si_viewport_set_rast_deps(struct r600_common_context *rctx,
885 bool scissor_enable, bool clip_halfz);
886 void si_update_vs_writes_viewport_index(struct r600_common_context *rctx,
887 struct tgsi_shader_info *info);
888 void si_init_viewport_functions(struct r600_common_context *rctx);
889
890 /* cayman_msaa.c */
891 void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
892 unsigned sample_index, float *out_value);
893 void si_init_msaa(struct pipe_context *ctx);
894 void si_common_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
895 void si_common_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
896 int ps_iter_samples, int overrast_samples,
897 unsigned sc_mode_cntl_1);
898
899
900 /* Inline helpers. */
901
902 static inline struct r600_resource *r600_resource(struct pipe_resource *r)
903 {
904 return (struct r600_resource*)r;
905 }
906
907 static inline void
908 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
909 {
910 pipe_resource_reference((struct pipe_resource **)ptr,
911 (struct pipe_resource *)res);
912 }
913
914 static inline void
915 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
916 {
917 pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
918 }
919
920 static inline void
921 r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
922 {
923 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
924 struct r600_resource *res = (struct r600_resource *)r;
925
926 if (res) {
927 /* Add memory usage for need_gfx_cs_space */
928 rctx->vram += res->vram_usage;
929 rctx->gtt += res->gart_usage;
930 }
931 }
932
933 static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
934 {
935 return rctx->streamout.streamout_enabled ||
936 rctx->streamout.prims_gen_query_enabled;
937 }
938
939 #define SQ_TEX_XY_FILTER_POINT 0x00
940 #define SQ_TEX_XY_FILTER_BILINEAR 0x01
941 #define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
942 #define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
943
944 static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
945 {
946 if (filter == PIPE_TEX_FILTER_LINEAR)
947 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
948 : SQ_TEX_XY_FILTER_BILINEAR;
949 else
950 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
951 : SQ_TEX_XY_FILTER_POINT;
952 }
953
954 static inline unsigned r600_tex_aniso_filter(unsigned filter)
955 {
956 if (filter < 2)
957 return 0;
958 if (filter < 4)
959 return 1;
960 if (filter < 8)
961 return 2;
962 if (filter < 16)
963 return 3;
964 return 4;
965 }
966
967 static inline unsigned r600_wavefront_size(enum radeon_family family)
968 {
969 switch (family) {
970 case CHIP_RV610:
971 case CHIP_RS780:
972 case CHIP_RV620:
973 case CHIP_RS880:
974 return 16;
975 case CHIP_RV630:
976 case CHIP_RV635:
977 case CHIP_RV730:
978 case CHIP_RV710:
979 case CHIP_PALM:
980 case CHIP_CEDAR:
981 return 32;
982 default:
983 return 64;
984 }
985 }
986
987 static inline enum radeon_bo_priority
988 r600_get_sampler_view_priority(struct r600_resource *res)
989 {
990 if (res->b.b.target == PIPE_BUFFER)
991 return RADEON_PRIO_SAMPLER_BUFFER;
992
993 if (res->b.b.nr_samples > 1)
994 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
995
996 return RADEON_PRIO_SAMPLER_TEXTURE;
997 }
998
999 static inline bool
1000 r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
1001 {
1002 return (stencil_sampler && tex->can_sample_s) ||
1003 (!stencil_sampler && tex->can_sample_z);
1004 }
1005
1006 static inline bool
1007 vi_dcc_enabled(struct r600_texture *tex, unsigned level)
1008 {
1009 return tex->dcc_offset && level < tex->surface.num_dcc_levels;
1010 }
1011
1012 static inline bool
1013 r600_htile_enabled(struct r600_texture *tex, unsigned level)
1014 {
1015 return tex->htile_offset && level == 0;
1016 }
1017
1018 static inline bool
1019 vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
1020 {
1021 assert(!tex->tc_compatible_htile || tex->htile_offset);
1022 return tex->tc_compatible_htile && level == 0;
1023 }
1024
1025 #define COMPUTE_DBG(rscreen, fmt, args...) \
1026 do { \
1027 if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
1028 } while (0);
1029
1030 #define R600_ERR(fmt, args...) \
1031 fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
1032
1033 /* For MSAA sample positions. */
1034 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
1035 (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
1036 (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
1037 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
1038 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
1039
1040 static inline int S_FIXED(float value, unsigned frac_bits)
1041 {
1042 return value * (1 << frac_bits);
1043 }
1044
1045 #endif