radeonsi: use shader_info::cs::local_size_variable to clean up some code
[mesa.git] / src / gallium / drivers / freedreno / freedreno_draw.c
1 /*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/u_draw.h"
29 #include "util/u_string.h"
30 #include "util/u_memory.h"
31 #include "util/u_prim.h"
32 #include "util/format/u_format.h"
33 #include "util/u_helpers.h"
34
35 #include "freedreno_blitter.h"
36 #include "freedreno_draw.h"
37 #include "freedreno_context.h"
38 #include "freedreno_fence.h"
39 #include "freedreno_state.h"
40 #include "freedreno_resource.h"
41 #include "freedreno_query_acc.h"
42 #include "freedreno_query_hw.h"
43 #include "freedreno_util.h"
44
45 static void
46 resource_read(struct fd_batch *batch, struct pipe_resource *prsc)
47 {
48 if (!prsc)
49 return;
50 fd_batch_resource_read(batch, fd_resource(prsc));
51 }
52
53 static void
54 resource_written(struct fd_batch *batch, struct pipe_resource *prsc)
55 {
56 if (!prsc)
57 return;
58 fd_batch_resource_write(batch, fd_resource(prsc));
59 }
60
61 static void
62 batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info)
63 {
64 struct fd_context *ctx = batch->ctx;
65 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
66 unsigned buffers = 0, restore_buffers = 0;
67
68 /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
69 * query_buf may not be created yet.
70 */
71 fd_batch_set_stage(batch, FD_STAGE_DRAW);
72
73 /*
74 * Figure out the buffers/features we need:
75 */
76
77 fd_screen_lock(ctx->screen);
78
79 if (ctx->dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA)) {
80 if (fd_depth_enabled(ctx)) {
81 if (fd_resource(pfb->zsbuf->texture)->valid) {
82 restore_buffers |= FD_BUFFER_DEPTH;
83 } else {
84 batch->invalidated |= FD_BUFFER_DEPTH;
85 }
86 batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
87 if (fd_depth_write_enabled(ctx)) {
88 buffers |= FD_BUFFER_DEPTH;
89 resource_written(batch, pfb->zsbuf->texture);
90 } else {
91 resource_read(batch, pfb->zsbuf->texture);
92 }
93 }
94
95 if (fd_stencil_enabled(ctx)) {
96 if (fd_resource(pfb->zsbuf->texture)->valid) {
97 restore_buffers |= FD_BUFFER_STENCIL;
98 } else {
99 batch->invalidated |= FD_BUFFER_STENCIL;
100 }
101 batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
102 buffers |= FD_BUFFER_STENCIL;
103 resource_written(batch, pfb->zsbuf->texture);
104 }
105 }
106
107 if (fd_logicop_enabled(ctx))
108 batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
109
110 for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
111 struct pipe_resource *surf;
112
113 if (!pfb->cbufs[i])
114 continue;
115
116 surf = pfb->cbufs[i]->texture;
117
118 if (fd_resource(surf)->valid) {
119 restore_buffers |= PIPE_CLEAR_COLOR0 << i;
120 } else {
121 batch->invalidated |= PIPE_CLEAR_COLOR0 << i;
122 }
123
124 buffers |= PIPE_CLEAR_COLOR0 << i;
125
126 if (fd_blend_enabled(ctx, i))
127 batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
128
129 if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
130 resource_written(batch, pfb->cbufs[i]->texture);
131 }
132
133 /* Mark SSBOs */
134 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) {
135 const struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[PIPE_SHADER_FRAGMENT];
136
137 foreach_bit (i, so->enabled_mask & so->writable_mask)
138 resource_written(batch, so->sb[i].buffer);
139
140 foreach_bit (i, so->enabled_mask & ~so->writable_mask)
141 resource_read(batch, so->sb[i].buffer);
142 }
143
144 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) {
145 foreach_bit (i, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask) {
146 struct pipe_image_view *img =
147 &ctx->shaderimg[PIPE_SHADER_FRAGMENT].si[i];
148 if (img->access & PIPE_IMAGE_ACCESS_WRITE)
149 resource_written(batch, img->resource);
150 else
151 resource_read(batch, img->resource);
152 }
153 }
154
155 if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_CONST) {
156 foreach_bit (i, ctx->constbuf[PIPE_SHADER_VERTEX].enabled_mask)
157 resource_read(batch, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
158 }
159
160 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_CONST) {
161 foreach_bit (i, ctx->constbuf[PIPE_SHADER_FRAGMENT].enabled_mask)
162 resource_read(batch, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
163 }
164
165 /* Mark VBOs as being read */
166 if (ctx->dirty & FD_DIRTY_VTXBUF) {
167 foreach_bit (i, ctx->vtx.vertexbuf.enabled_mask) {
168 assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer);
169 resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource);
170 }
171 }
172
173 /* Mark index buffer as being read */
174 if (info->index_size)
175 resource_read(batch, info->index.resource);
176
177 /* Mark indirect draw buffer as being read */
178 if (info->indirect)
179 resource_read(batch, info->indirect->buffer);
180
181 /* Mark textures as being read */
182 if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
183 foreach_bit (i, ctx->tex[PIPE_SHADER_VERTEX].valid_textures)
184 resource_read(batch, ctx->tex[PIPE_SHADER_VERTEX].textures[i]->texture);
185 }
186
187 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
188 foreach_bit (i, ctx->tex[PIPE_SHADER_FRAGMENT].valid_textures)
189 resource_read(batch, ctx->tex[PIPE_SHADER_FRAGMENT].textures[i]->texture);
190 }
191
192 /* Mark streamout buffers as being written.. */
193 if (ctx->dirty & FD_DIRTY_STREAMOUT) {
194 for (unsigned i = 0; i < ctx->streamout.num_targets; i++)
195 if (ctx->streamout.targets[i])
196 resource_written(batch, ctx->streamout.targets[i]->buffer);
197 }
198
199 resource_written(batch, batch->query_buf);
200
201 list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
202 resource_written(batch, aq->prsc);
203
204 fd_screen_unlock(ctx->screen);
205
206 /* any buffers that haven't been cleared yet, we need to restore: */
207 batch->restore |= restore_buffers & (FD_BUFFER_ALL & ~batch->invalidated);
208 /* and any buffers used, need to be resolved: */
209 batch->resolve |= buffers;
210 }
211
212 static void
213 fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
214 {
215 struct fd_context *ctx = fd_context(pctx);
216
217 /* for debugging problems with indirect draw, it is convenient
218 * to be able to emulate it, to determine if game is feeding us
219 * bogus data:
220 */
221 if (info->indirect && (fd_mesa_debug & FD_DBG_NOINDR)) {
222 util_draw_indirect(pctx, info);
223 return;
224 }
225
226 if (!info->count_from_stream_output && !info->indirect &&
227 !info->primitive_restart &&
228 !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
229 return;
230
231 /* TODO: push down the region versions into the tiles */
232 if (!fd_render_condition_check(pctx))
233 return;
234
235 /* emulate unsupported primitives: */
236 if (!fd_supported_prim(ctx, info->mode)) {
237 if (ctx->streamout.num_targets > 0)
238 debug_error("stream-out with emulated prims");
239 util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
240 util_primconvert_draw_vbo(ctx->primconvert, info);
241 return;
242 }
243
244 /* Upload a user index buffer. */
245 struct pipe_resource *indexbuf = NULL;
246 unsigned index_offset = 0;
247 struct pipe_draw_info new_info;
248 if (info->index_size) {
249 if (info->has_user_indices) {
250 if (!util_upload_index_buffer(pctx, info, &indexbuf, &index_offset, 4))
251 return;
252 new_info = *info;
253 new_info.index.resource = indexbuf;
254 new_info.has_user_indices = false;
255 info = &new_info;
256 } else {
257 indexbuf = info->index.resource;
258 }
259 }
260
261 struct fd_batch *batch = NULL;
262 fd_batch_reference(&batch, fd_context_batch(ctx));
263
264 if (ctx->in_discard_blit) {
265 fd_batch_reset(batch);
266 fd_context_all_dirty(ctx);
267 }
268
269 batch_draw_tracking(batch, info);
270
271 if (unlikely(ctx->batch != batch)) {
272 /* The current batch was flushed in batch_draw_tracking()
273 * so start anew. We know this won't happen a second time
274 * since we are dealing with a fresh batch:
275 */
276 fd_batch_reference(&batch, fd_context_batch(ctx));
277 batch_draw_tracking(batch, info);
278 assert(ctx->batch == batch);
279 }
280
281 batch->blit = ctx->in_discard_blit;
282 batch->back_blit = ctx->in_shadow;
283 batch->num_draws++;
284
285 /* Counting prims in sw doesn't work for GS and tesselation. For older
286 * gens we don't have those stages and don't have the hw counters enabled,
287 * so keep the count accurate for non-patch geometry.
288 */
289 unsigned prims;
290 if (info->mode != PIPE_PRIM_PATCHES)
291 prims = u_reduced_prims_for_vertices(info->mode, info->count);
292 else
293 prims = 0;
294
295 ctx->stats.draw_calls++;
296
297 /* TODO prims_emitted should be clipped when the stream-out buffer is
298 * not large enough. See max_tf_vtx().. probably need to move that
299 * into common code. Although a bit more annoying since a2xx doesn't
300 * use ir3 so no common way to get at the pipe_stream_output_info
301 * which is needed for this calculation.
302 */
303 if (ctx->streamout.num_targets > 0)
304 ctx->stats.prims_emitted += prims;
305 ctx->stats.prims_generated += prims;
306
307 /* Clearing last_fence must come after the batch dependency tracking
308 * (resource_read()/resource_written()), as that can trigger a flush,
309 * re-populating last_fence
310 */
311 fd_fence_ref(&ctx->last_fence, NULL);
312
313 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
314 DBG("%p: %ux%u num_draws=%u (%s/%s)", batch,
315 pfb->width, pfb->height, batch->num_draws,
316 util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
317 util_format_short_name(pipe_surface_format(pfb->zsbuf)));
318
319 if (ctx->draw_vbo(ctx, info, index_offset))
320 batch->needs_flush = true;
321
322 batch->num_vertices += info->count * info->instance_count;
323
324 for (unsigned i = 0; i < ctx->streamout.num_targets; i++)
325 ctx->streamout.offsets[i] += info->count;
326
327 if (fd_mesa_debug & FD_DBG_DDRAW)
328 fd_context_all_dirty(ctx);
329
330 fd_batch_check_size(batch);
331 fd_batch_reference(&batch, NULL);
332
333 if (info == &new_info)
334 pipe_resource_reference(&indexbuf, NULL);
335 }
336
337 static void
338 batch_clear_tracking(struct fd_batch *batch, unsigned buffers)
339 {
340 struct fd_context *ctx = batch->ctx;
341 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
342 unsigned cleared_buffers;
343
344 /* pctx->clear() is only for full-surface clears, so scissor is
345 * equivalent to having GL_SCISSOR_TEST disabled:
346 */
347 batch->max_scissor.minx = 0;
348 batch->max_scissor.miny = 0;
349 batch->max_scissor.maxx = pfb->width;
350 batch->max_scissor.maxy = pfb->height;
351
352 /* for bookkeeping about which buffers have been cleared (and thus
353 * can fully or partially skip mem2gmem) we need to ignore buffers
354 * that have already had a draw, in case apps do silly things like
355 * clear after draw (ie. if you only clear the color buffer, but
356 * something like alpha-test causes side effects from the draw in
357 * the depth buffer, etc)
358 */
359 cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
360 batch->cleared |= buffers;
361 batch->invalidated |= cleared_buffers;
362
363 batch->resolve |= buffers;
364 batch->needs_flush = true;
365
366 fd_screen_lock(ctx->screen);
367
368 if (buffers & PIPE_CLEAR_COLOR)
369 for (unsigned i = 0; i < pfb->nr_cbufs; i++)
370 if (buffers & (PIPE_CLEAR_COLOR0 << i))
371 resource_written(batch, pfb->cbufs[i]->texture);
372
373 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
374 resource_written(batch, pfb->zsbuf->texture);
375 batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
376 }
377
378 resource_written(batch, batch->query_buf);
379
380 list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
381 resource_written(batch, aq->prsc);
382
383 fd_screen_unlock(ctx->screen);
384 }
385
386 static void
387 fd_clear(struct pipe_context *pctx, unsigned buffers,
388 const struct pipe_scissor_state *scissor_state,
389 const union pipe_color_union *color, double depth,
390 unsigned stencil)
391 {
392 struct fd_context *ctx = fd_context(pctx);
393
394 /* TODO: push down the region versions into the tiles */
395 if (!fd_render_condition_check(pctx))
396 return;
397
398 struct fd_batch *batch = NULL;
399 fd_batch_reference(&batch, fd_context_batch(ctx));
400
401 if (ctx->in_discard_blit) {
402 fd_batch_reset(batch);
403 fd_context_all_dirty(ctx);
404 }
405
406 batch_clear_tracking(batch, buffers);
407
408 if (unlikely(ctx->batch != batch)) {
409 /* The current batch was flushed in batch_clear_tracking()
410 * so start anew. We know this won't happen a second time
411 * since we are dealing with a fresh batch:
412 */
413 fd_batch_reference(&batch, fd_context_batch(ctx));
414 batch_clear_tracking(batch, buffers);
415 assert(ctx->batch == batch);
416 }
417
418 /* Clearing last_fence must come after the batch dependency tracking
419 * (resource_read()/resource_written()), as that can trigger a flush,
420 * re-populating last_fence
421 */
422 fd_fence_ref(&ctx->last_fence, NULL);
423
424 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
425 DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers,
426 pfb->width, pfb->height, depth, stencil,
427 util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
428 util_format_short_name(pipe_surface_format(pfb->zsbuf)));
429
430 /* if per-gen backend doesn't implement ctx->clear() generic
431 * blitter clear:
432 */
433 bool fallback = true;
434
435 if (ctx->clear) {
436 fd_batch_set_stage(batch, FD_STAGE_CLEAR);
437
438 if (ctx->clear(ctx, buffers, color, depth, stencil)) {
439 if (fd_mesa_debug & FD_DBG_DCLEAR)
440 fd_context_all_dirty(ctx);
441
442 fallback = false;
443 }
444 }
445
446 if (fallback) {
447 fd_blitter_clear(pctx, buffers, color, depth, stencil);
448 }
449
450 fd_batch_check_size(batch);
451 fd_batch_reference(&batch, NULL);
452 }
453
454 static void
455 fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
456 const union pipe_color_union *color,
457 unsigned x, unsigned y, unsigned w, unsigned h,
458 bool render_condition_enabled)
459 {
460 DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
461 }
462
463 static void
464 fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
465 unsigned buffers, double depth, unsigned stencil,
466 unsigned x, unsigned y, unsigned w, unsigned h,
467 bool render_condition_enabled)
468 {
469 DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
470 buffers, depth, stencil, x, y, w, h);
471 }
472
473 static void
474 fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
475 {
476 struct fd_context *ctx = fd_context(pctx);
477 const struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[PIPE_SHADER_COMPUTE];
478 struct fd_batch *batch, *save_batch = NULL;
479
480 batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
481 fd_batch_reference(&save_batch, ctx->batch);
482 fd_batch_reference(&ctx->batch, batch);
483 fd_context_all_dirty(ctx);
484
485 fd_screen_lock(ctx->screen);
486
487 /* Mark SSBOs */
488 foreach_bit (i, so->enabled_mask & so->writable_mask)
489 resource_written(batch, so->sb[i].buffer);
490
491 foreach_bit (i, so->enabled_mask & ~so->writable_mask)
492 resource_read(batch, so->sb[i].buffer);
493
494 foreach_bit(i, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
495 struct pipe_image_view *img =
496 &ctx->shaderimg[PIPE_SHADER_COMPUTE].si[i];
497 if (img->access & PIPE_IMAGE_ACCESS_WRITE)
498 resource_written(batch, img->resource);
499 else
500 resource_read(batch, img->resource);
501 }
502
503 /* UBO's are read */
504 foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
505 resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
506
507 /* Mark textures as being read */
508 foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
509 resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
510
511 /* For global buffers, we don't really know if read or written, so assume
512 * the worst:
513 */
514 foreach_bit(i, ctx->global_bindings.enabled_mask)
515 resource_written(batch, ctx->global_bindings.buf[i]);
516
517 if (info->indirect)
518 resource_read(batch, info->indirect);
519
520 fd_screen_unlock(ctx->screen);
521
522 batch->needs_flush = true;
523 ctx->launch_grid(ctx, info);
524
525 fd_batch_flush(batch);
526
527 fd_batch_reference(&ctx->batch, save_batch);
528 fd_context_all_dirty(ctx);
529 fd_batch_reference(&save_batch, NULL);
530 fd_batch_reference(&batch, NULL);
531 }
532
533 void
534 fd_draw_init(struct pipe_context *pctx)
535 {
536 pctx->draw_vbo = fd_draw_vbo;
537 pctx->clear = fd_clear;
538 pctx->clear_render_target = fd_clear_render_target;
539 pctx->clear_depth_stencil = fd_clear_depth_stencil;
540
541 if (has_compute(fd_screen(pctx->screen))) {
542 pctx->launch_grid = fd_launch_grid;
543 }
544 }