Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / radeonsi / si_hw_context.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jerome Glisse
25 */
26
27 #include "si_pipe.h"
28
29 /* initialize */
30 void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
31 boolean count_draw_in)
32 {
33 struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
34 int i;
35
36 /* If the CS is sufficiently large, don't count the space needed
37 * and just flush if there is less than 8096 dwords left. */
38 if (cs->max_dw >= 24 * 1024) {
39 if (cs->cdw > cs->max_dw - 8 * 1024)
40 ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
41 return;
42 }
43
44 /* There are two memory usage counters in the winsys for all buffers
45 * that have been added (cs_add_reloc) and two counters in the pipe
46 * driver for those that haven't been added yet.
47 * */
48 if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
49 ctx->b.gtt = 0;
50 ctx->b.vram = 0;
51 ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
52 return;
53 }
54 ctx->b.gtt = 0;
55 ctx->b.vram = 0;
56
57 /* The number of dwords we already used in the CS so far. */
58 num_dw += cs->cdw;
59
60 if (count_draw_in) {
61 for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
62 if (ctx->atoms.array[i]->dirty) {
63 num_dw += ctx->atoms.array[i]->num_dw;
64 }
65 }
66
67 /* The number of dwords all the dirty states would take. */
68 num_dw += si_pm4_dirty_dw(ctx);
69
70 /* The upper-bound of how much a draw command would take. */
71 num_dw += SI_MAX_DRAW_CS_DWORDS;
72 }
73
74 /* Count in queries_suspend. */
75 num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
76 ctx->b.num_cs_dw_timer_queries_suspend;
77
78 /* Count in streamout_end at the end of CS. */
79 if (ctx->b.streamout.begin_emitted) {
80 num_dw += ctx->b.streamout.num_dw_for_end;
81 }
82
83 /* Count in render_condition(NULL) at the end of CS. */
84 if (ctx->b.predicate_drawing) {
85 num_dw += 3;
86 }
87
88 /* Count in framebuffer cache flushes at the end of CS. */
89 num_dw += ctx->atoms.s.cache_flush->num_dw;
90
91 if (ctx->screen->b.trace_bo)
92 num_dw += SI_TRACE_CS_DWORDS * 2;
93
94 /* Flush if there's not enough space. */
95 if (num_dw > cs->max_dw) {
96 ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
97 }
98 }
99
100 void si_context_gfx_flush(void *context, unsigned flags,
101 struct pipe_fence_handle **fence)
102 {
103 struct si_context *ctx = context;
104 struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
105 struct radeon_winsys *ws = ctx->b.ws;
106
107 if (cs->cdw == ctx->b.initial_gfx_cs_size &&
108 (!fence || ctx->last_gfx_fence)) {
109 if (fence)
110 ws->fence_reference(fence, ctx->last_gfx_fence);
111 if (!(flags & RADEON_FLUSH_ASYNC))
112 ws->cs_sync_flush(cs);
113 return;
114 }
115
116 ctx->b.rings.gfx.flushing = true;
117
118 r600_preflush_suspend_features(&ctx->b);
119
120 ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
121 SI_CONTEXT_INV_TC_L1 |
122 SI_CONTEXT_INV_TC_L2 |
123 /* this is probably not needed anymore */
124 SI_CONTEXT_PS_PARTIAL_FLUSH;
125 si_emit_cache_flush(&ctx->b, NULL);
126
127 /* force to keep tiling flags */
128 flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
129
130 if (ctx->trace_buf)
131 si_trace_emit(ctx);
132
133 /* Save the IB for debug contexts. */
134 if (ctx->is_debug) {
135 free(ctx->last_ib);
136 ctx->last_ib_dw_size = cs->cdw;
137 ctx->last_ib = malloc(cs->cdw * 4);
138 memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
139 r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
140 r600_resource_reference(&ctx->trace_buf, NULL);
141 }
142
143 /* Flush the CS. */
144 ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
145 ctx->screen->b.cs_count++);
146 ctx->b.rings.gfx.flushing = false;
147
148 if (fence)
149 ws->fence_reference(fence, ctx->last_gfx_fence);
150
151 si_begin_new_cs(ctx);
152 }
153
154 void si_begin_new_cs(struct si_context *ctx)
155 {
156 if (ctx->is_debug) {
157 uint32_t zero = 0;
158
159 /* Create a buffer used for writing trace IDs and initialize it to 0. */
160 assert(!ctx->trace_buf);
161 ctx->trace_buf = (struct r600_resource*)
162 pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM,
163 PIPE_USAGE_STAGING, 4);
164 if (ctx->trace_buf)
165 pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
166 0, sizeof(zero), &zero);
167 ctx->trace_id = 0;
168 }
169
170 if (ctx->trace_buf)
171 si_trace_emit(ctx);
172
173 /* Flush read caches at the beginning of CS. */
174 ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
175 SI_CONTEXT_INV_TC_L1 |
176 SI_CONTEXT_INV_TC_L2 |
177 SI_CONTEXT_INV_KCACHE |
178 SI_CONTEXT_INV_ICACHE;
179
180 /* set all valid group as dirty so they get reemited on
181 * next draw command
182 */
183 si_pm4_reset_emitted(ctx);
184
185 /* The CS initialization should be emitted before everything else. */
186 si_pm4_emit(ctx, ctx->init_config);
187
188 si_mark_atom_dirty(ctx, &ctx->clip_regs);
189 si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
190 si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
191 si_mark_atom_dirty(ctx, &ctx->msaa_config);
192 si_mark_atom_dirty(ctx, &ctx->db_render_state);
193 si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
194 si_all_descriptors_begin_new_cs(ctx);
195
196 r600_postflush_resume_features(&ctx->b);
197
198 ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
199
200 /* Invalidate various draw states so that they are emitted before
201 * the first draw call. */
202 si_invalidate_draw_sh_constants(ctx);
203 ctx->last_primitive_restart_en = -1;
204 ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
205 ctx->last_gs_out_prim = -1;
206 ctx->last_prim = -1;
207 ctx->last_multi_vgt_param = -1;
208 ctx->last_ls_hs_config = -1;
209 ctx->last_rast_prim = -1;
210 ctx->last_sc_line_stipple = ~0;
211 ctx->emit_scratch_reloc = true;
212 ctx->last_ls = NULL;
213 ctx->last_tcs = NULL;
214 ctx->last_tes_sh_base = -1;
215 ctx->last_num_tcs_input_cp = -1;
216 }