swr: use swr_query_result type instead of void
[mesa.git] / src / gallium / drivers / swr / swr_context.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 #include "swr_context.h"
25 #include "swr_memory.h"
26 #include "swr_screen.h"
27 #include "swr_resource.h"
28 #include "swr_scratch.h"
29 #include "swr_query.h"
30 #include "swr_fence.h"
31
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "util/u_format.h"
35 #include "util/u_atomic.h"
36 #include "util/u_upload_mgr.h"
37 #include "util/u_transfer.h"
38 #include "util/u_surface.h"
39
40 #include "api.h"
41 #include "backend.h"
42
43 static struct pipe_surface *
44 swr_create_surface(struct pipe_context *pipe,
45 struct pipe_resource *pt,
46 const struct pipe_surface *surf_tmpl)
47 {
48 struct pipe_surface *ps;
49
50 ps = CALLOC_STRUCT(pipe_surface);
51 if (ps) {
52 pipe_reference_init(&ps->reference, 1);
53 pipe_resource_reference(&ps->texture, pt);
54 ps->context = pipe;
55 ps->format = surf_tmpl->format;
56 if (pt->target != PIPE_BUFFER) {
57 assert(surf_tmpl->u.tex.level <= pt->last_level);
58 ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
59 ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
60 ps->u.tex.level = surf_tmpl->u.tex.level;
61 ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
62 ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
63 } else {
64 /* setting width as number of elements should get us correct
65 * renderbuffer width */
66 ps->width = surf_tmpl->u.buf.last_element
67 - surf_tmpl->u.buf.first_element + 1;
68 ps->height = pt->height0;
69 ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
70 ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
71 assert(ps->u.buf.first_element <= ps->u.buf.last_element);
72 assert(ps->u.buf.last_element < ps->width);
73 }
74 }
75 return ps;
76 }
77
78 static void
79 swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)
80 {
81 assert(surf->texture);
82 struct pipe_resource *resource = surf->texture;
83
84 /* If the resource has been drawn to, store tiles. */
85 swr_store_dirty_resource(pipe, resource, SWR_TILE_RESOLVED);
86
87 pipe_resource_reference(&resource, NULL);
88 FREE(surf);
89 }
90
91
92 static void *
93 swr_transfer_map(struct pipe_context *pipe,
94 struct pipe_resource *resource,
95 unsigned level,
96 unsigned usage,
97 const struct pipe_box *box,
98 struct pipe_transfer **transfer)
99 {
100 struct swr_screen *screen = swr_screen(pipe->screen);
101 struct swr_resource *spr = swr_resource(resource);
102 struct pipe_transfer *pt;
103 enum pipe_format format = resource->format;
104
105 assert(resource);
106 assert(level <= resource->last_level);
107
108 /* If mapping an attached rendertarget, store tiles to surface and set
109 * postStoreTileState to SWR_TILE_INVALID so tiles get reloaded on next use
110 * and nothing needs to be done at unmap. */
111 swr_store_dirty_resource(pipe, resource, SWR_TILE_INVALID);
112
113 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
114 /* If resource is in use, finish fence before mapping.
115 * Unless requested not to block, then if not done return NULL map */
116 if (usage & PIPE_TRANSFER_DONTBLOCK) {
117 if (swr_is_fence_pending(screen->flush_fence))
118 return NULL;
119 } else {
120 if (spr->status) {
121 /* But, if there's no fence pending, submit one.
122 * XXX: Remove once draw timestamps are finished. */
123 if (!swr_is_fence_pending(screen->flush_fence))
124 swr_fence_submit(swr_context(pipe), screen->flush_fence);
125
126 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
127 swr_resource_unused(resource);
128 }
129 }
130 }
131
132 pt = CALLOC_STRUCT(pipe_transfer);
133 if (!pt)
134 return NULL;
135 pipe_resource_reference(&pt->resource, resource);
136 pt->usage = (pipe_transfer_usage)usage;
137 pt->level = level;
138 pt->box = *box;
139 pt->stride = spr->swr.pitch;
140 pt->layer_stride = spr->swr.qpitch * spr->swr.pitch;
141
142 /* if we're mapping the depth/stencil, copy in stencil for the section
143 * being read in
144 */
145 if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) {
146 size_t zbase, sbase;
147 for (int z = box->z; z < box->z + box->depth; z++) {
148 zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch +
149 spr->mip_offsets[level];
150 sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch +
151 spr->secondary_mip_offsets[level];
152 for (int y = box->y; y < box->y + box->height; y++) {
153 if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
154 for (int x = box->x; x < box->x + box->width; x++)
155 spr->swr.pBaseAddress[zbase + 4 * x + 3] =
156 spr->secondary.pBaseAddress[sbase + x];
157 } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
158 for (int x = box->x; x < box->x + box->width; x++)
159 spr->swr.pBaseAddress[zbase + 8 * x + 4] =
160 spr->secondary.pBaseAddress[sbase + x];
161 }
162 zbase += spr->swr.pitch;
163 sbase += spr->secondary.pitch;
164 }
165 }
166 }
167
168 unsigned offset = box->z * pt->layer_stride +
169 util_format_get_nblocksy(format, box->y) * pt->stride +
170 util_format_get_stride(format, box->x);
171
172 *transfer = pt;
173
174 return spr->swr.pBaseAddress + offset + spr->mip_offsets[level];
175 }
176
177 static void
178 swr_transfer_flush_region(struct pipe_context *pipe,
179 struct pipe_transfer *transfer,
180 const struct pipe_box *flush_box)
181 {
182 assert(transfer->resource);
183 assert(transfer->usage & PIPE_TRANSFER_WRITE);
184
185 struct swr_resource *spr = swr_resource(transfer->resource);
186 if (!spr->has_depth || !spr->has_stencil)
187 return;
188
189 size_t zbase, sbase;
190 struct pipe_box box = *flush_box;
191 box.x += transfer->box.x;
192 box.y += transfer->box.y;
193 box.z += transfer->box.z;
194 for (int z = box.z; z < box.z + box.depth; z++) {
195 zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch +
196 spr->mip_offsets[transfer->level];
197 sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch +
198 spr->secondary_mip_offsets[transfer->level];
199 for (int y = box.y; y < box.y + box.height; y++) {
200 if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
201 for (int x = box.x; x < box.x + box.width; x++)
202 spr->secondary.pBaseAddress[sbase + x] =
203 spr->swr.pBaseAddress[zbase + 4 * x + 3];
204 } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
205 for (int x = box.x; x < box.x + box.width; x++)
206 spr->secondary.pBaseAddress[sbase + x] =
207 spr->swr.pBaseAddress[zbase + 8 * x + 4];
208 }
209 zbase += spr->swr.pitch;
210 sbase += spr->secondary.pitch;
211 }
212 }
213 }
214
215 static void
216 swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
217 {
218 assert(transfer->resource);
219
220 struct swr_resource *spr = swr_resource(transfer->resource);
221 /* if we're mapping the depth/stencil, copy in stencil for the section
222 * being written out
223 */
224 if (transfer->usage & PIPE_TRANSFER_WRITE &&
225 !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) &&
226 spr->has_depth && spr->has_stencil) {
227 struct pipe_box box;
228 u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height,
229 transfer->box.depth, &box);
230 swr_transfer_flush_region(pipe, transfer, &box);
231 }
232
233 pipe_resource_reference(&transfer->resource, NULL);
234 FREE(transfer);
235 }
236
237
238 static void
239 swr_resource_copy(struct pipe_context *pipe,
240 struct pipe_resource *dst,
241 unsigned dst_level,
242 unsigned dstx,
243 unsigned dsty,
244 unsigned dstz,
245 struct pipe_resource *src,
246 unsigned src_level,
247 const struct pipe_box *src_box)
248 {
249 struct swr_screen *screen = swr_screen(pipe->screen);
250
251 /* If either the src or dst is a renderTarget, store tiles before copy */
252 swr_store_dirty_resource(pipe, src, SWR_TILE_RESOLVED);
253 swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED);
254
255 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
256 swr_resource_unused(src);
257 swr_resource_unused(dst);
258
259 if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
260 || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
261 util_resource_copy_region(
262 pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);
263 return;
264 }
265
266 debug_printf("unhandled swr_resource_copy\n");
267 }
268
269
270 static void
271 swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
272 {
273 struct swr_context *ctx = swr_context(pipe);
274 /* Make a copy of the const blit_info, so we can modify it */
275 struct pipe_blit_info info = *blit_info;
276
277 if (info.render_condition_enable && !swr_check_render_cond(pipe))
278 return;
279
280 if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1
281 && !util_format_is_depth_or_stencil(info.src.resource->format)
282 && !util_format_is_pure_integer(info.src.resource->format)) {
283 debug_printf("swr_blit: color resolve : %d -> %d\n",
284 info.src.resource->nr_samples, info.dst.resource->nr_samples);
285
286 /* Resolve is done as part of the surface store. */
287 swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
288
289 struct pipe_resource *src_resource = info.src.resource;
290 struct pipe_resource *resolve_target =
291 swr_resource(src_resource)->resolve_target;
292
293 /* The resolve target becomes the new source for the blit. */
294 info.src.resource = resolve_target;
295 }
296
297 if (util_try_blit_via_copy_region(pipe, &info)) {
298 return; /* done */
299 }
300
301 if (info.mask & PIPE_MASK_S) {
302 debug_printf("swr: cannot blit stencil, skipping\n");
303 info.mask &= ~PIPE_MASK_S;
304 }
305
306 if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
307 debug_printf("swr: blit unsupported %s -> %s\n",
308 util_format_short_name(info.src.resource->format),
309 util_format_short_name(info.dst.resource->format));
310 return;
311 }
312
313 if (ctx->active_queries) {
314 SwrEnableStatsFE(ctx->swrContext, FALSE);
315 SwrEnableStatsBE(ctx->swrContext, FALSE);
316 }
317
318 util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
319 util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
320 util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
321 util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);
322 util_blitter_save_so_targets(
323 ctx->blitter,
324 ctx->num_so_targets,
325 (struct pipe_stream_output_target **)ctx->so_targets);
326 util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer);
327 util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
328 util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
329 util_blitter_save_fragment_shader(ctx->blitter, ctx->fs);
330 util_blitter_save_blend(ctx->blitter, (void *)ctx->blend);
331 util_blitter_save_depth_stencil_alpha(ctx->blitter,
332 (void *)ctx->depth_stencil);
333 util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
334 util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
335 util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
336 util_blitter_save_fragment_sampler_states(
337 ctx->blitter,
338 ctx->num_samplers[PIPE_SHADER_FRAGMENT],
339 (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);
340 util_blitter_save_fragment_sampler_views(
341 ctx->blitter,
342 ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],
343 ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
344 util_blitter_save_render_condition(ctx->blitter,
345 ctx->render_cond_query,
346 ctx->render_cond_cond,
347 ctx->render_cond_mode);
348
349 util_blitter_blit(ctx->blitter, &info);
350
351 if (ctx->active_queries) {
352 SwrEnableStatsFE(ctx->swrContext, TRUE);
353 SwrEnableStatsBE(ctx->swrContext, TRUE);
354 }
355 }
356
357
358 static void
359 swr_destroy(struct pipe_context *pipe)
360 {
361 struct swr_context *ctx = swr_context(pipe);
362 struct swr_screen *screen = swr_screen(pipe->screen);
363
364 if (ctx->blitter)
365 util_blitter_destroy(ctx->blitter);
366
367 for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
368 pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
369 }
370
371 pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL);
372
373 for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {
374 pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
375 }
376
377 for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {
378 pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
379 }
380
381 if (ctx->pipe.stream_uploader)
382 u_upload_destroy(ctx->pipe.stream_uploader);
383
384 /* Idle core after destroying buffer resources, but before deleting
385 * context. Destroying resources has potentially called StoreTiles.*/
386 SwrWaitForIdle(ctx->swrContext);
387
388 if (ctx->swrContext)
389 SwrDestroyContext(ctx->swrContext);
390
391 delete ctx->blendJIT;
392
393 swr_destroy_scratch_buffers(ctx);
394
395 /* Only update screen->pipe if current context is being destroyed */
396 assert(screen);
397 if (screen->pipe == pipe)
398 screen->pipe = NULL;
399
400 AlignedFree(ctx);
401 }
402
403
404 static void
405 swr_render_condition(struct pipe_context *pipe,
406 struct pipe_query *query,
407 boolean condition,
408 enum pipe_render_cond_flag mode)
409 {
410 struct swr_context *ctx = swr_context(pipe);
411
412 ctx->render_cond_query = query;
413 ctx->render_cond_mode = mode;
414 ctx->render_cond_cond = condition;
415 }
416
417 static void
418 swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
419 {
420 swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
421
422 if (!pDC)
423 return;
424
425 struct swr_query_result *pqr = pDC->pStats;
426
427 SWR_STATS *pSwrStats = &pqr->core;
428
429 pSwrStats->DepthPassCount += pStats->DepthPassCount;
430 pSwrStats->PsInvocations += pStats->PsInvocations;
431 pSwrStats->CsInvocations += pStats->CsInvocations;
432 }
433
434 static void
435 swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
436 {
437 swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
438
439 if (!pDC)
440 return;
441
442 struct swr_query_result *pqr = pDC->pStats;
443
444 SWR_STATS_FE *pSwrStats = &pqr->coreFE;
445 p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);
446 p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);
447 p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);
448 p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);
449 p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);
450 p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);
451 p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);
452 p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);
453 p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);
454
455 for (unsigned i = 0; i < 4; i++) {
456 p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],
457 pStats->SoPrimStorageNeeded[i]);
458 p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],
459 pStats->SoNumPrimsWritten[i]);
460 }
461 }
462
463 struct pipe_context *
464 swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
465 {
466 struct swr_context *ctx = (struct swr_context *)
467 AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);
468 memset(ctx, 0, sizeof(struct swr_context));
469
470 ctx->blendJIT =
471 new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
472
473 SWR_CREATECONTEXT_INFO createInfo;
474 memset(&createInfo, 0, sizeof(createInfo));
475 createInfo.privateStateSize = sizeof(swr_draw_context);
476 createInfo.pfnLoadTile = swr_LoadHotTile;
477 createInfo.pfnStoreTile = swr_StoreHotTile;
478 createInfo.pfnClearTile = swr_StoreHotTileClear;
479 createInfo.pfnUpdateStats = swr_UpdateStats;
480 createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
481 ctx->swrContext = SwrCreateContext(&createInfo);
482
483 SwrInit();
484
485 if (ctx->swrContext == NULL)
486 goto fail;
487
488 ctx->pipe.screen = p_screen;
489 ctx->pipe.destroy = swr_destroy;
490 ctx->pipe.priv = priv;
491 ctx->pipe.create_surface = swr_create_surface;
492 ctx->pipe.surface_destroy = swr_surface_destroy;
493 ctx->pipe.transfer_map = swr_transfer_map;
494 ctx->pipe.transfer_unmap = swr_transfer_unmap;
495 ctx->pipe.transfer_flush_region = swr_transfer_flush_region;
496
497 ctx->pipe.buffer_subdata = u_default_buffer_subdata;
498 ctx->pipe.texture_subdata = u_default_texture_subdata;
499
500 ctx->pipe.clear_texture = util_clear_texture;
501 ctx->pipe.resource_copy_region = swr_resource_copy;
502 ctx->pipe.render_condition = swr_render_condition;
503
504 swr_state_init(&ctx->pipe);
505 swr_clear_init(&ctx->pipe);
506 swr_draw_init(&ctx->pipe);
507 swr_query_init(&ctx->pipe);
508
509 ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe);
510 if (!ctx->pipe.stream_uploader)
511 goto fail;
512 ctx->pipe.const_uploader = ctx->pipe.stream_uploader;
513
514 ctx->pipe.blit = swr_blit;
515 ctx->blitter = util_blitter_create(&ctx->pipe);
516 if (!ctx->blitter)
517 goto fail;
518
519 swr_init_scratch_buffers(ctx);
520
521 return &ctx->pipe;
522
523 fail:
524 /* Should really validate the init steps and fail gracefully */
525 swr_destroy(&ctx->pipe);
526 return NULL;
527 }