1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include "swr_screen.h"
25 #include "swr_context.h"
26 #include "swr_resource.h"
27 #include "swr_fence.h"
28 #include "swr_query.h"
31 #include "util/u_draw.h"
32 #include "util/u_prim.h"
35 * Draw vertex arrays, with optional indexing, optional instancing.
38 swr_draw_vbo(struct pipe_context
*pipe
, const struct pipe_draw_info
*info
)
40 struct swr_context
*ctx
= swr_context(pipe
);
42 if (!info
->count_from_stream_output
&& !info
->indirect
&&
43 !info
->primitive_restart
&&
44 !u_trim_pipe_prim(info
->mode
, (unsigned*)&info
->count
))
47 if (!swr_check_render_cond(pipe
))
51 util_draw_indirect(pipe
, info
);
55 /* If indexed draw, force vertex validation since index buffer comes
58 ctx
->dirty
|= SWR_NEW_VERTEX
;
60 /* Update derived state, pass draw info to update function. */
61 swr_update_derived(pipe
, info
);
63 swr_update_draw_context(ctx
);
65 if (ctx
->vs
->pipe
.stream_output
.num_outputs
) {
66 if (!ctx
->vs
->soFunc
[info
->mode
]) {
67 STREAMOUT_COMPILE_STATE state
= {0};
68 struct pipe_stream_output_info
*so
= &ctx
->vs
->pipe
.stream_output
;
70 state
.numVertsPerPrim
= u_vertices_per_prim(info
->mode
);
72 uint32_t offsets
[MAX_SO_STREAMS
] = {0};
75 for (uint32_t i
= 0; i
< so
->num_outputs
; i
++) {
76 assert(so
->output
[i
].stream
== 0); // @todo
77 uint32_t output_buffer
= so
->output
[i
].output_buffer
;
78 if (so
->output
[i
].dst_offset
!= offsets
[output_buffer
]) {
79 // hole - need to fill
80 state
.stream
.decl
[num
].bufferIndex
= output_buffer
;
81 state
.stream
.decl
[num
].hole
= true;
82 state
.stream
.decl
[num
].componentMask
=
83 (1 << (so
->output
[i
].dst_offset
- offsets
[output_buffer
]))
86 offsets
[output_buffer
] = so
->output
[i
].dst_offset
;
89 unsigned attrib_slot
= so
->output
[i
].register_index
;
90 attrib_slot
= swr_so_adjust_attrib(attrib_slot
, ctx
->vs
);
92 state
.stream
.decl
[num
].bufferIndex
= output_buffer
;
93 state
.stream
.decl
[num
].attribSlot
= attrib_slot
;
94 state
.stream
.decl
[num
].componentMask
=
95 ((1 << so
->output
[i
].num_components
) - 1)
96 << so
->output
[i
].start_component
;
97 state
.stream
.decl
[num
].hole
= false;
100 offsets
[output_buffer
] += so
->output
[i
].num_components
;
103 state
.stream
.numDecls
= num
;
105 HANDLE hJitMgr
= swr_screen(pipe
->screen
)->hJitMgr
;
106 ctx
->vs
->soFunc
[info
->mode
] = JitCompileStreamout(hJitMgr
, state
);
107 debug_printf("so shader %p\n", ctx
->vs
->soFunc
[info
->mode
]);
108 assert(ctx
->vs
->soFunc
[info
->mode
] && "Error: SoShader = NULL");
111 ctx
->api
.pfnSwrSetSoFunc(ctx
->swrContext
, ctx
->vs
->soFunc
[info
->mode
], 0);
114 struct swr_vertex_element_state
*velems
= ctx
->velems
;
115 if (info
->primitive_restart
)
116 velems
->fsState
.cutIndex
= info
->restart_index
;
118 velems
->fsState
.cutIndex
= 0;
119 velems
->fsState
.bEnableCutIndex
= info
->primitive_restart
;
120 velems
->fsState
.bPartialVertexBuffer
= (info
->min_index
> 0);
122 swr_jit_fetch_key key
;
123 swr_generate_fetch_key(key
, velems
);
124 auto search
= velems
->map
.find(key
);
125 if (search
!= velems
->map
.end()) {
126 velems
->fsFunc
= search
->second
;
128 HANDLE hJitMgr
= swr_screen(ctx
->pipe
.screen
)->hJitMgr
;
129 velems
->fsFunc
= JitCompileFetch(hJitMgr
, velems
->fsState
);
131 debug_printf("fetch shader %p\n", velems
->fsFunc
);
132 assert(velems
->fsFunc
&& "Error: FetchShader = NULL");
134 velems
->map
.insert(std::make_pair(key
, velems
->fsFunc
));
137 ctx
->api
.pfnSwrSetFetchFunc(ctx
->swrContext
, velems
->fsFunc
);
139 /* Set up frontend state
140 * XXX setup provokingVertex & topologyProvokingVertex */
141 SWR_FRONTEND_STATE feState
= {0};
143 // feState.vsVertexSize seeds the PA size that is used as an interface
144 // between all the shader stages, so it has to be large enough to
145 // incorporate all interfaces between stages
147 // max of gs and vs num_outputs
148 feState
.vsVertexSize
= ctx
->vs
->info
.base
.num_outputs
;
150 ctx
->gs
->info
.base
.num_outputs
> feState
.vsVertexSize
) {
151 feState
.vsVertexSize
= ctx
->gs
->info
.base
.num_outputs
;
154 if (ctx
->vs
->info
.base
.num_outputs
) {
155 // gs does not adjust for position in SGV slot at input from vs
157 feState
.vsVertexSize
--;
160 // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
161 feState
.vsVertexSize
+= VERTEX_ATTRIB_START_SLOT
;
163 // The PA in the clipper does not handle BE vertex sizes
164 // different from FE. Increase vertexsize only for the cases that needed it
166 // primid needs a slot
167 if (ctx
->fs
->info
.base
.uses_primid
)
168 feState
.vsVertexSize
++;
169 // sprite coord enable
170 if (ctx
->rasterizer
->sprite_coord_enable
)
171 feState
.vsVertexSize
++;
174 if (ctx
->rasterizer
->flatshade_first
) {
175 feState
.provokingVertex
= {1, 0, 0};
177 feState
.provokingVertex
= {2, 1, 2};
180 enum pipe_prim_type topology
;
182 topology
= (pipe_prim_type
)ctx
->gs
->info
.base
.properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
];
184 topology
= info
->mode
;
187 case PIPE_PRIM_TRIANGLE_FAN
:
188 feState
.topologyProvokingVertex
= feState
.provokingVertex
.triFan
;
190 case PIPE_PRIM_TRIANGLE_STRIP
:
191 case PIPE_PRIM_TRIANGLES
:
192 feState
.topologyProvokingVertex
= feState
.provokingVertex
.triStripList
;
194 case PIPE_PRIM_QUAD_STRIP
:
195 case PIPE_PRIM_QUADS
:
196 if (ctx
->rasterizer
->flatshade_first
)
197 feState
.topologyProvokingVertex
= 0;
199 feState
.topologyProvokingVertex
= 3;
201 case PIPE_PRIM_LINES
:
202 case PIPE_PRIM_LINE_LOOP
:
203 case PIPE_PRIM_LINE_STRIP
:
204 feState
.topologyProvokingVertex
= feState
.provokingVertex
.lineStripList
;
207 feState
.topologyProvokingVertex
= 0;
210 feState
.bEnableCutIndex
= info
->primitive_restart
;
211 ctx
->api
.pfnSwrSetFrontendState(ctx
->swrContext
, &feState
);
213 if (info
->index_size
)
214 ctx
->api
.pfnSwrDrawIndexedInstanced(ctx
->swrContext
,
215 swr_convert_prim_topology(info
->mode
),
217 info
->instance_count
,
220 info
->start_instance
);
222 ctx
->api
.pfnSwrDrawInstanced(ctx
->swrContext
,
223 swr_convert_prim_topology(info
->mode
),
225 info
->instance_count
,
227 info
->start_instance
);
229 /* On large client-buffer draw, we used client buffer directly, without
230 * copy. Block until draw is finished.
231 * VMD is an example application that benefits from this. */
232 if (ctx
->dirty
& SWR_LARGE_CLIENT_DRAW
) {
233 struct swr_screen
*screen
= swr_screen(pipe
->screen
);
234 swr_fence_submit(ctx
, screen
->flush_fence
);
235 swr_fence_finish(pipe
->screen
, NULL
, screen
->flush_fence
, 0);
241 swr_flush(struct pipe_context
*pipe
,
242 struct pipe_fence_handle
**fence
,
245 struct swr_context
*ctx
= swr_context(pipe
);
246 struct swr_screen
*screen
= swr_screen(pipe
->screen
);
248 for (int i
=0; i
< ctx
->framebuffer
.nr_cbufs
; i
++) {
249 struct pipe_surface
*cb
= ctx
->framebuffer
.cbufs
[i
];
251 swr_store_dirty_resource(pipe
, cb
->texture
, SWR_TILE_RESOLVED
);
254 if (ctx
->framebuffer
.zsbuf
) {
255 swr_store_dirty_resource(pipe
, ctx
->framebuffer
.zsbuf
->texture
,
260 swr_fence_reference(pipe
->screen
, fence
, screen
->flush_fence
);
264 swr_finish(struct pipe_context
*pipe
)
266 struct pipe_fence_handle
*fence
= nullptr;
268 swr_flush(pipe
, &fence
, 0);
269 swr_fence_finish(pipe
->screen
, NULL
, fence
, 0);
270 swr_fence_reference(pipe
->screen
, &fence
, NULL
);
274 * Invalidate tiles so they can be reloaded back when needed
277 swr_invalidate_render_target(struct pipe_context
*pipe
,
279 uint16_t width
, uint16_t height
)
281 struct swr_context
*ctx
= swr_context(pipe
);
283 /* grab the rect from the passed in arguments */
284 swr_update_draw_context(ctx
);
286 {0, 0, (int32_t)width
, (int32_t)height
};
287 ctx
->api
.pfnSwrInvalidateTiles(ctx
->swrContext
,
294 * Store SWR HotTiles back to renderTarget surface.
297 swr_store_render_target(struct pipe_context
*pipe
,
299 enum SWR_TILE_STATE post_tile_state
)
301 struct swr_context
*ctx
= swr_context(pipe
);
302 struct swr_draw_context
*pDC
= &ctx
->swrDC
;
303 struct SWR_SURFACE_STATE
*renderTarget
= &pDC
->renderTargets
[attachment
];
305 /* Only proceed if there's a valid surface to store to */
306 if (renderTarget
->xpBaseAddress
) {
307 swr_update_draw_context(ctx
);
310 (int32_t)u_minify(renderTarget
->width
, renderTarget
->lod
),
311 (int32_t)u_minify(renderTarget
->height
, renderTarget
->lod
)};
312 ctx
->api
.pfnSwrStoreTiles(ctx
->swrContext
,
320 swr_store_dirty_resource(struct pipe_context
*pipe
,
321 struct pipe_resource
*resource
,
322 enum SWR_TILE_STATE post_tile_state
)
324 /* Only store resource if it has been written to */
325 if (swr_resource(resource
)->status
& SWR_RESOURCE_WRITE
) {
326 struct swr_context
*ctx
= swr_context(pipe
);
327 struct swr_screen
*screen
= swr_screen(pipe
->screen
);
328 struct swr_resource
*spr
= swr_resource(resource
);
330 swr_draw_context
*pDC
= &ctx
->swrDC
;
331 SWR_SURFACE_STATE
*renderTargets
= pDC
->renderTargets
;
332 for (uint32_t i
= 0; i
< SWR_NUM_ATTACHMENTS
; i
++)
333 if (renderTargets
[i
].xpBaseAddress
== spr
->swr
.xpBaseAddress
||
334 (spr
->secondary
.xpBaseAddress
&&
335 renderTargets
[i
].xpBaseAddress
== spr
->secondary
.xpBaseAddress
)) {
336 swr_store_render_target(pipe
, i
, post_tile_state
);
338 /* Mesa thinks depth/stencil are fused, so we'll never get an
339 * explicit resource for stencil. So, if checking depth, then
340 * also check for stencil. */
341 if (spr
->has_stencil
&& (i
== SWR_ATTACHMENT_DEPTH
)) {
342 swr_store_render_target(
343 pipe
, SWR_ATTACHMENT_STENCIL
, post_tile_state
);
346 /* This fence signals StoreTiles completion */
347 swr_fence_submit(ctx
, screen
->flush_fence
);
355 swr_draw_init(struct pipe_context
*pipe
)
357 pipe
->draw_vbo
= swr_draw_vbo
;
358 pipe
->flush
= swr_flush
;