1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include "swr_screen.h"
25 #include "swr_context.h"
26 #include "swr_resource.h"
27 #include "swr_fence.h"
28 #include "swr_query.h"
31 #include "util/u_draw.h"
32 #include "util/u_prim.h"
37 * Draw vertex arrays, with optional indexing, optional instancing.
40 swr_draw_vbo(struct pipe_context
*pipe
, const struct pipe_draw_info
*info
)
42 struct swr_context
*ctx
= swr_context(pipe
);
44 if (!info
->count_from_stream_output
&& !info
->indirect
&&
45 !info
->primitive_restart
&&
46 !u_trim_pipe_prim(info
->mode
, (unsigned*)&info
->count
))
49 if (!swr_check_render_cond(pipe
))
53 util_draw_indirect(pipe
, info
);
57 /* If indexed draw, force vertex validation since index buffer comes
60 ctx
->dirty
|= SWR_NEW_VERTEX
;
62 /* Update derived state, pass draw info to update function. */
63 swr_update_derived(pipe
, info
);
65 swr_update_draw_context(ctx
);
67 struct pipe_draw_info resolved_info
;
68 /* DrawTransformFeedback */
69 if (info
->count_from_stream_output
) {
70 // trick copied from softpipe to modify const struct *info
71 memcpy(&resolved_info
, (void*)info
, sizeof(struct pipe_draw_info
));
72 resolved_info
.count
= ctx
->so_primCounter
* resolved_info
.vertices_per_patch
;
73 resolved_info
.max_index
= resolved_info
.count
- 1;
74 info
= &resolved_info
;
77 if (ctx
->vs
->pipe
.stream_output
.num_outputs
) {
78 if (!ctx
->vs
->soFunc
[info
->mode
]) {
79 STREAMOUT_COMPILE_STATE state
= {0};
80 struct pipe_stream_output_info
*so
= &ctx
->vs
->pipe
.stream_output
;
82 state
.numVertsPerPrim
= u_vertices_per_prim(info
->mode
);
84 uint32_t offsets
[MAX_SO_STREAMS
] = {0};
87 for (uint32_t i
= 0; i
< so
->num_outputs
; i
++) {
88 assert(so
->output
[i
].stream
== 0); // @todo
89 uint32_t output_buffer
= so
->output
[i
].output_buffer
;
90 if (so
->output
[i
].dst_offset
!= offsets
[output_buffer
]) {
91 // hole - need to fill
92 state
.stream
.decl
[num
].bufferIndex
= output_buffer
;
93 state
.stream
.decl
[num
].hole
= true;
94 state
.stream
.decl
[num
].componentMask
=
95 (1 << (so
->output
[i
].dst_offset
- offsets
[output_buffer
]))
98 offsets
[output_buffer
] = so
->output
[i
].dst_offset
;
101 unsigned attrib_slot
= so
->output
[i
].register_index
;
102 attrib_slot
= swr_so_adjust_attrib(attrib_slot
, ctx
->vs
);
104 state
.stream
.decl
[num
].bufferIndex
= output_buffer
;
105 state
.stream
.decl
[num
].attribSlot
= attrib_slot
;
106 state
.stream
.decl
[num
].componentMask
=
107 ((1 << so
->output
[i
].num_components
) - 1)
108 << so
->output
[i
].start_component
;
109 state
.stream
.decl
[num
].hole
= false;
112 offsets
[output_buffer
] += so
->output
[i
].num_components
;
115 state
.stream
.numDecls
= num
;
117 HANDLE hJitMgr
= swr_screen(pipe
->screen
)->hJitMgr
;
118 ctx
->vs
->soFunc
[info
->mode
] = JitCompileStreamout(hJitMgr
, state
);
119 debug_printf("so shader %p\n", ctx
->vs
->soFunc
[info
->mode
]);
120 assert(ctx
->vs
->soFunc
[info
->mode
] && "Error: SoShader = NULL");
123 ctx
->api
.pfnSwrSetSoFunc(ctx
->swrContext
, ctx
->vs
->soFunc
[info
->mode
], 0);
126 struct swr_vertex_element_state
*velems
= ctx
->velems
;
127 if (info
->primitive_restart
)
128 velems
->fsState
.cutIndex
= info
->restart_index
;
130 velems
->fsState
.cutIndex
= 0;
131 velems
->fsState
.bEnableCutIndex
= info
->primitive_restart
;
132 velems
->fsState
.bPartialVertexBuffer
= (info
->min_index
> 0);
134 swr_jit_fetch_key key
;
135 swr_generate_fetch_key(key
, velems
);
136 auto search
= velems
->map
.find(key
);
137 if (search
!= velems
->map
.end()) {
138 velems
->fsFunc
= search
->second
;
140 HANDLE hJitMgr
= swr_screen(ctx
->pipe
.screen
)->hJitMgr
;
141 velems
->fsFunc
= JitCompileFetch(hJitMgr
, velems
->fsState
);
143 debug_printf("fetch shader %p\n", velems
->fsFunc
);
144 assert(velems
->fsFunc
&& "Error: FetchShader = NULL");
146 velems
->map
.insert(std::make_pair(key
, velems
->fsFunc
));
149 ctx
->api
.pfnSwrSetFetchFunc(ctx
->swrContext
, velems
->fsFunc
);
151 /* Set up frontend state
152 * XXX setup provokingVertex & topologyProvokingVertex */
153 SWR_FRONTEND_STATE feState
= {0};
155 // feState.vsVertexSize seeds the PA size that is used as an interface
156 // between all the shader stages, so it has to be large enough to
157 // incorporate all interfaces between stages
159 // max of frontend shaders num_outputs
160 feState
.vsVertexSize
= ctx
->vs
->info
.base
.num_outputs
;
162 feState
.vsVertexSize
= std::max(feState
.vsVertexSize
, (uint32_t)ctx
->gs
->info
.base
.num_outputs
);
165 feState
.vsVertexSize
= std::max(feState
.vsVertexSize
, (uint32_t)ctx
->tcs
->info
.base
.num_outputs
);
168 feState
.vsVertexSize
= std::max(feState
.vsVertexSize
, (uint32_t)ctx
->tes
->info
.base
.num_outputs
);
172 if (ctx
->vs
->info
.base
.num_outputs
) {
173 // gs does not adjust for position in SGV slot at input from vs
174 if (!ctx
->gs
&& !ctx
->tcs
&& !ctx
->tes
)
175 feState
.vsVertexSize
--;
178 // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
179 feState
.vsVertexSize
+= VERTEX_ATTRIB_START_SLOT
;
181 // The PA in the clipper does not handle BE vertex sizes
182 // different from FE. Increase vertexsize only for the cases that needed it
184 // primid needs a slot
185 if (ctx
->fs
->info
.base
.uses_primid
)
186 feState
.vsVertexSize
++;
187 // sprite coord enable
188 if (ctx
->rasterizer
->sprite_coord_enable
)
189 feState
.vsVertexSize
++;
191 if (ctx
->rasterizer
->flatshade_first
) {
192 feState
.provokingVertex
= {1, 0, 0};
194 feState
.provokingVertex
= {2, 1, 2};
197 enum pipe_prim_type topology
;
199 topology
= (pipe_prim_type
)ctx
->gs
->info
.base
.properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
];
201 topology
= info
->mode
;
204 case PIPE_PRIM_TRIANGLE_FAN
:
205 feState
.topologyProvokingVertex
= feState
.provokingVertex
.triFan
;
207 case PIPE_PRIM_TRIANGLE_STRIP
:
208 case PIPE_PRIM_TRIANGLES
:
209 feState
.topologyProvokingVertex
= feState
.provokingVertex
.triStripList
;
211 case PIPE_PRIM_QUAD_STRIP
:
212 case PIPE_PRIM_QUADS
:
213 if (ctx
->rasterizer
->flatshade_first
)
214 feState
.topologyProvokingVertex
= 0;
216 feState
.topologyProvokingVertex
= 3;
218 case PIPE_PRIM_LINES
:
219 case PIPE_PRIM_LINE_LOOP
:
220 case PIPE_PRIM_LINE_STRIP
:
221 feState
.topologyProvokingVertex
= feState
.provokingVertex
.lineStripList
;
224 feState
.topologyProvokingVertex
= 0;
227 feState
.bEnableCutIndex
= info
->primitive_restart
;
228 ctx
->api
.pfnSwrSetFrontendState(ctx
->swrContext
, &feState
);
230 if (info
->index_size
)
231 ctx
->api
.pfnSwrDrawIndexedInstanced(ctx
->swrContext
,
232 swr_convert_prim_topology(info
->mode
, info
->vertices_per_patch
),
234 info
->instance_count
,
237 info
->start_instance
);
239 ctx
->api
.pfnSwrDrawInstanced(ctx
->swrContext
,
240 swr_convert_prim_topology(info
->mode
, info
->vertices_per_patch
),
242 info
->instance_count
,
244 info
->start_instance
);
246 /* On client-buffer draw, we used client buffer directly, without
247 * copy. Block until draw is finished.
248 * VMD is an example application that benefits from this. */
249 if (ctx
->dirty
& SWR_BLOCK_CLIENT_DRAW
) {
250 struct swr_screen
*screen
= swr_screen(pipe
->screen
);
251 swr_fence_submit(ctx
, screen
->flush_fence
);
252 swr_fence_finish(pipe
->screen
, NULL
, screen
->flush_fence
, 0);
258 swr_flush(struct pipe_context
*pipe
,
259 struct pipe_fence_handle
**fence
,
262 struct swr_context
*ctx
= swr_context(pipe
);
263 struct swr_screen
*screen
= swr_screen(pipe
->screen
);
265 for (int i
=0; i
< ctx
->framebuffer
.nr_cbufs
; i
++) {
266 struct pipe_surface
*cb
= ctx
->framebuffer
.cbufs
[i
];
268 swr_store_dirty_resource(pipe
, cb
->texture
, SWR_TILE_RESOLVED
);
271 if (ctx
->framebuffer
.zsbuf
) {
272 swr_store_dirty_resource(pipe
, ctx
->framebuffer
.zsbuf
->texture
,
277 swr_fence_reference(pipe
->screen
, fence
, screen
->flush_fence
);
281 swr_finish(struct pipe_context
*pipe
)
283 struct pipe_fence_handle
*fence
= nullptr;
285 swr_flush(pipe
, &fence
, 0);
286 swr_fence_finish(pipe
->screen
, NULL
, fence
, 0);
287 swr_fence_reference(pipe
->screen
, &fence
, NULL
);
291 * Invalidate tiles so they can be reloaded back when needed
294 swr_invalidate_render_target(struct pipe_context
*pipe
,
296 uint16_t width
, uint16_t height
)
298 struct swr_context
*ctx
= swr_context(pipe
);
300 /* grab the rect from the passed in arguments */
301 swr_update_draw_context(ctx
);
303 {0, 0, (int32_t)width
, (int32_t)height
};
304 ctx
->api
.pfnSwrInvalidateTiles(ctx
->swrContext
,
311 * Store SWR HotTiles back to renderTarget surface.
314 swr_store_render_target(struct pipe_context
*pipe
,
316 enum SWR_TILE_STATE post_tile_state
)
318 struct swr_context
*ctx
= swr_context(pipe
);
319 struct swr_draw_context
*pDC
= &ctx
->swrDC
;
320 struct SWR_SURFACE_STATE
*renderTarget
= &pDC
->renderTargets
[attachment
];
322 /* Only proceed if there's a valid surface to store to */
323 if (renderTarget
->xpBaseAddress
) {
324 swr_update_draw_context(ctx
);
327 (int32_t)u_minify(renderTarget
->width
, renderTarget
->lod
),
328 (int32_t)u_minify(renderTarget
->height
, renderTarget
->lod
)};
329 ctx
->api
.pfnSwrStoreTiles(ctx
->swrContext
,
337 swr_store_dirty_resource(struct pipe_context
*pipe
,
338 struct pipe_resource
*resource
,
339 enum SWR_TILE_STATE post_tile_state
)
341 /* Only store resource if it has been written to */
342 if (swr_resource(resource
)->status
& SWR_RESOURCE_WRITE
) {
343 struct swr_context
*ctx
= swr_context(pipe
);
344 struct swr_screen
*screen
= swr_screen(pipe
->screen
);
345 struct swr_resource
*spr
= swr_resource(resource
);
347 swr_draw_context
*pDC
= &ctx
->swrDC
;
348 SWR_SURFACE_STATE
*renderTargets
= pDC
->renderTargets
;
349 for (uint32_t i
= 0; i
< SWR_NUM_ATTACHMENTS
; i
++)
350 if (renderTargets
[i
].xpBaseAddress
== spr
->swr
.xpBaseAddress
||
351 (spr
->secondary
.xpBaseAddress
&&
352 renderTargets
[i
].xpBaseAddress
== spr
->secondary
.xpBaseAddress
)) {
353 swr_store_render_target(pipe
, i
, post_tile_state
);
355 /* Mesa thinks depth/stencil are fused, so we'll never get an
356 * explicit resource for stencil. So, if checking depth, then
357 * also check for stencil. */
358 if (spr
->has_stencil
&& (i
== SWR_ATTACHMENT_DEPTH
)) {
359 swr_store_render_target(
360 pipe
, SWR_ATTACHMENT_STENCIL
, post_tile_state
);
363 /* This fence signals StoreTiles completion */
364 swr_fence_submit(ctx
, screen
->flush_fence
);
372 swr_draw_init(struct pipe_context
*pipe
)
374 pipe
->draw_vbo
= swr_draw_vbo
;
375 pipe
->flush
= swr_flush
;