b7f354cd2a21826a6ad08b2e70241c03ada1f789
[mesa.git] / src / gallium / drivers / swr / swr_draw.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 #include "swr_screen.h"
25 #include "swr_context.h"
26 #include "swr_resource.h"
27 #include "swr_fence.h"
28 #include "swr_query.h"
29 #include "jit_api.h"
30
31 #include "util/u_draw.h"
32 #include "util/u_prim.h"
33
34 /*
35 * Draw vertex arrays, with optional indexing, optional instancing.
36 */
37 static void
38 swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
39 {
40 struct swr_context *ctx = swr_context(pipe);
41
42 if (!info->count_from_stream_output && !info->indirect &&
43 !info->primitive_restart &&
44 !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
45 return;
46
47 if (!swr_check_render_cond(pipe))
48 return;
49
50 if (info->indirect) {
51 util_draw_indirect(pipe, info);
52 return;
53 }
54
55 /* If indexed draw, force vertex validation since index buffer comes
56 * from draw info. */
57 if (info->index_size)
58 ctx->dirty |= SWR_NEW_VERTEX;
59
60 /* Update derived state, pass draw info to update function. */
61 swr_update_derived(pipe, info);
62
63 swr_update_draw_context(ctx);
64
65 struct pipe_draw_info resolved_info;
66 /* DrawTransformFeedback */
67 if (info->count_from_stream_output) {
68 // trick copied from softpipe to modify const struct *info
69 memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
70 resolved_info.count = ctx->so_primCounter * resolved_info.vertices_per_patch;
71 resolved_info.max_index = resolved_info.count - 1;
72 info = &resolved_info;
73 }
74
75 if (ctx->vs->pipe.stream_output.num_outputs) {
76 if (!ctx->vs->soFunc[info->mode]) {
77 STREAMOUT_COMPILE_STATE state = {0};
78 struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
79
80 state.numVertsPerPrim = u_vertices_per_prim(info->mode);
81
82 uint32_t offsets[MAX_SO_STREAMS] = {0};
83 uint32_t num = 0;
84
85 for (uint32_t i = 0; i < so->num_outputs; i++) {
86 assert(so->output[i].stream == 0); // @todo
87 uint32_t output_buffer = so->output[i].output_buffer;
88 if (so->output[i].dst_offset != offsets[output_buffer]) {
89 // hole - need to fill
90 state.stream.decl[num].bufferIndex = output_buffer;
91 state.stream.decl[num].hole = true;
92 state.stream.decl[num].componentMask =
93 (1 << (so->output[i].dst_offset - offsets[output_buffer]))
94 - 1;
95 num++;
96 offsets[output_buffer] = so->output[i].dst_offset;
97 }
98
99 unsigned attrib_slot = so->output[i].register_index;
100 attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
101
102 state.stream.decl[num].bufferIndex = output_buffer;
103 state.stream.decl[num].attribSlot = attrib_slot;
104 state.stream.decl[num].componentMask =
105 ((1 << so->output[i].num_components) - 1)
106 << so->output[i].start_component;
107 state.stream.decl[num].hole = false;
108 num++;
109
110 offsets[output_buffer] += so->output[i].num_components;
111 }
112
113 state.stream.numDecls = num;
114
115 HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
116 ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
117 debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]);
118 assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
119 }
120
121 ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
122 }
123
124 struct swr_vertex_element_state *velems = ctx->velems;
125 if (info->primitive_restart)
126 velems->fsState.cutIndex = info->restart_index;
127 else
128 velems->fsState.cutIndex = 0;
129 velems->fsState.bEnableCutIndex = info->primitive_restart;
130 velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
131
132 swr_jit_fetch_key key;
133 swr_generate_fetch_key(key, velems);
134 auto search = velems->map.find(key);
135 if (search != velems->map.end()) {
136 velems->fsFunc = search->second;
137 } else {
138 HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
139 velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
140
141 debug_printf("fetch shader %p\n", velems->fsFunc);
142 assert(velems->fsFunc && "Error: FetchShader = NULL");
143
144 velems->map.insert(std::make_pair(key, velems->fsFunc));
145 }
146
147 ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
148
149 /* Set up frontend state
150 * XXX setup provokingVertex & topologyProvokingVertex */
151 SWR_FRONTEND_STATE feState = {0};
152
153 // feState.vsVertexSize seeds the PA size that is used as an interface
154 // between all the shader stages, so it has to be large enough to
155 // incorporate all interfaces between stages
156
157 // max of gs and vs num_outputs
158 feState.vsVertexSize = ctx->vs->info.base.num_outputs;
159 if (ctx->gs &&
160 ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
161 feState.vsVertexSize = ctx->gs->info.base.num_outputs;
162 }
163
164 if (ctx->vs->info.base.num_outputs) {
165 // gs does not adjust for position in SGV slot at input from vs
166 if (!ctx->gs)
167 feState.vsVertexSize--;
168 }
169
170 // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
171 feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
172
173 // The PA in the clipper does not handle BE vertex sizes
174 // different from FE. Increase vertexsize only for the cases that needed it
175
176 // primid needs a slot
177 if (ctx->fs->info.base.uses_primid)
178 feState.vsVertexSize++;
179 // sprite coord enable
180 if (ctx->rasterizer->sprite_coord_enable)
181 feState.vsVertexSize++;
182
183
184 if (ctx->rasterizer->flatshade_first) {
185 feState.provokingVertex = {1, 0, 0};
186 } else {
187 feState.provokingVertex = {2, 1, 2};
188 }
189
190 enum pipe_prim_type topology;
191 if (ctx->gs)
192 topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
193 else
194 topology = info->mode;
195
196 switch (topology) {
197 case PIPE_PRIM_TRIANGLE_FAN:
198 feState.topologyProvokingVertex = feState.provokingVertex.triFan;
199 break;
200 case PIPE_PRIM_TRIANGLE_STRIP:
201 case PIPE_PRIM_TRIANGLES:
202 feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
203 break;
204 case PIPE_PRIM_QUAD_STRIP:
205 case PIPE_PRIM_QUADS:
206 if (ctx->rasterizer->flatshade_first)
207 feState.topologyProvokingVertex = 0;
208 else
209 feState.topologyProvokingVertex = 3;
210 break;
211 case PIPE_PRIM_LINES:
212 case PIPE_PRIM_LINE_LOOP:
213 case PIPE_PRIM_LINE_STRIP:
214 feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
215 break;
216 default:
217 feState.topologyProvokingVertex = 0;
218 }
219
220 feState.bEnableCutIndex = info->primitive_restart;
221 ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
222
223 if (info->index_size)
224 ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
225 swr_convert_prim_topology(info->mode),
226 info->count,
227 info->instance_count,
228 info->start,
229 info->index_bias,
230 info->start_instance);
231 else
232 ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
233 swr_convert_prim_topology(info->mode),
234 info->count,
235 info->instance_count,
236 info->start,
237 info->start_instance);
238
239 /* On client-buffer draw, we used client buffer directly, without
240 * copy. Block until draw is finished.
241 * VMD is an example application that benefits from this. */
242 if (ctx->dirty & SWR_BLOCK_CLIENT_DRAW) {
243 struct swr_screen *screen = swr_screen(pipe->screen);
244 swr_fence_submit(ctx, screen->flush_fence);
245 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
246 }
247 }
248
249
250 static void
251 swr_flush(struct pipe_context *pipe,
252 struct pipe_fence_handle **fence,
253 unsigned flags)
254 {
255 struct swr_context *ctx = swr_context(pipe);
256 struct swr_screen *screen = swr_screen(pipe->screen);
257
258 for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
259 struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
260 if (cb) {
261 swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
262 }
263 }
264 if (ctx->framebuffer.zsbuf) {
265 swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
266 SWR_TILE_RESOLVED);
267 }
268
269 if (fence)
270 swr_fence_reference(pipe->screen, fence, screen->flush_fence);
271 }
272
273 void
274 swr_finish(struct pipe_context *pipe)
275 {
276 struct pipe_fence_handle *fence = nullptr;
277
278 swr_flush(pipe, &fence, 0);
279 swr_fence_finish(pipe->screen, NULL, fence, 0);
280 swr_fence_reference(pipe->screen, &fence, NULL);
281 }
282
283 /*
284 * Invalidate tiles so they can be reloaded back when needed
285 */
286 void
287 swr_invalidate_render_target(struct pipe_context *pipe,
288 uint32_t attachment,
289 uint16_t width, uint16_t height)
290 {
291 struct swr_context *ctx = swr_context(pipe);
292
293 /* grab the rect from the passed in arguments */
294 swr_update_draw_context(ctx);
295 SWR_RECT full_rect =
296 {0, 0, (int32_t)width, (int32_t)height};
297 ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
298 1 << attachment,
299 full_rect);
300 }
301
302
303 /*
304 * Store SWR HotTiles back to renderTarget surface.
305 */
306 void
307 swr_store_render_target(struct pipe_context *pipe,
308 uint32_t attachment,
309 enum SWR_TILE_STATE post_tile_state)
310 {
311 struct swr_context *ctx = swr_context(pipe);
312 struct swr_draw_context *pDC = &ctx->swrDC;
313 struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
314
315 /* Only proceed if there's a valid surface to store to */
316 if (renderTarget->xpBaseAddress) {
317 swr_update_draw_context(ctx);
318 SWR_RECT full_rect =
319 {0, 0,
320 (int32_t)u_minify(renderTarget->width, renderTarget->lod),
321 (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
322 ctx->api.pfnSwrStoreTiles(ctx->swrContext,
323 1 << attachment,
324 post_tile_state,
325 full_rect);
326 }
327 }
328
329 void
330 swr_store_dirty_resource(struct pipe_context *pipe,
331 struct pipe_resource *resource,
332 enum SWR_TILE_STATE post_tile_state)
333 {
334 /* Only store resource if it has been written to */
335 if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
336 struct swr_context *ctx = swr_context(pipe);
337 struct swr_screen *screen = swr_screen(pipe->screen);
338 struct swr_resource *spr = swr_resource(resource);
339
340 swr_draw_context *pDC = &ctx->swrDC;
341 SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
342 for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
343 if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
344 (spr->secondary.xpBaseAddress &&
345 renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
346 swr_store_render_target(pipe, i, post_tile_state);
347
348 /* Mesa thinks depth/stencil are fused, so we'll never get an
349 * explicit resource for stencil. So, if checking depth, then
350 * also check for stencil. */
351 if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
352 swr_store_render_target(
353 pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
354 }
355
356 /* This fence signals StoreTiles completion */
357 swr_fence_submit(ctx, screen->flush_fence);
358
359 break;
360 }
361 }
362 }
363
364 void
365 swr_draw_init(struct pipe_context *pipe)
366 {
367 pipe->draw_vbo = swr_draw_vbo;
368 pipe->flush = swr_flush;
369 }